def _isotope_layer_pattern(): i_slyr_ptt = _sublayer_pattern(key_ptt='i') h_slyr_ptt = _sublayer_pattern(key_ptt='h') ptt = (i_slyr_ptt + app.maybe(SLASH + h_slyr_ptt) + app.maybe(SLASH + _stereo_layer_pattern())) return ptt
def anharmonic_frequencies_reader(output_string): """ Get the anharmonic vibrational frequencies """ # block block = apf.last_capture( (app.escape('Fundamental Bands (DE w.r.t. Ground State)') + app.capturing(app.one_or_more(app.WILDCARD, greedy=False)) + app.escape('Overtones (DE w.r.t. Ground State)')), output_string) pattern = (app.INTEGER + app.escape('(1)') + app.SPACE + app.maybe(app.one_or_more(app.LOWERCASE_LETTER)) + app.one_or_more(app.SPACE) + app.FLOAT + app.one_or_more(app.SPACE) + app.capturing(app.FLOAT) + app.one_or_more(app.SPACE) + app.FLOAT + app.one_or_more(app.SPACE) + app.FLOAT + app.one_or_more(app.SPACE) + app.FLOAT) pattern2 = (app.INTEGER + app.escape('(1)') + app.SPACE + app.maybe(app.one_or_more(app.LOWERCASE_LETTER)) + app.one_or_more(app.SPACE) + app.FLOAT + app.one_or_more(app.SPACE) + app.capturing(app.FLOAT) + app.one_or_more(app.SPACE) + app.one_or_more(app.escape('*')) + app.one_or_more(app.SPACE) + app.one_or_more(app.escape('*')) + app.one_or_more(app.SPACE) + app.FLOAT) # Get list of values anharm_freq = [float(val) for val in apf.all_captures(pattern, block)] if not anharm_freq: anharm_freq = [float(val) for val in apf.all_captures(pattern2, block)] return anharm_freq
def _stereo_layer(ich): """ stereo layer """ ptt = (_version_pattern() + SLASH + _formula_sublayer_pattern() + app.maybe(SLASH + _main_layer_pattern()) + app.maybe(SLASH + _charge_layer_pattern()) + SLASH + app.capturing(_stereo_layer_pattern())) lyr = apf.first_capture(ptt, ich) return lyr
def _stereo_layer_pattern(): b_slyr_ptt = _sublayer_pattern(key_ptt='b') t_slyr_ptt = _sublayer_pattern(key_ptt='t') m_slyr_ptt = _sublayer_pattern(key_ptt='m') s_slyr_ptt = _sublayer_pattern(key_ptt='s') ptt = (app.one_of_these([b_slyr_ptt, t_slyr_ptt]) + app.maybe(SLASH + t_slyr_ptt) + app.maybe(SLASH + m_slyr_ptt) + app.maybe(SLASH + s_slyr_ptt)) return ptt
def inp_zmatrix(inp_str): """ Reads the input z-matrix from the input file string Returns the Z-Matrix in Bohr and Radians. :param output_str: string of the program's output file :type output_str: str :rtype: automol molecular geometry data structure """ # Reads the matrix from the beginning of the input symbs, key_mat, name_mat = ar.vmat.read( inp_str, start_ptt=app.padded(app.NEWLINE).join([ app.escape('comment:'), app.LINE, app.LINE, '']), symb_ptt=(ar.par.Pattern.ATOM_SYMBOL + app.not_followed_by(app.SPACES + app.FLOAT) + app.maybe(app.UNSIGNED_INTEGER)), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # Reads the values from the input if all(x is not None for x in (symbs, key_mat, name_mat)): if len(symbs) == 1: # val_dct = {} val_mat = ((None, None, None),) else: val_dct = ar.setval.read( inp_str, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Variables:', app.NONNEWLINE), '']), entry_sep_ptt='', entry_start_ptt='', sep_ptt=app.maybe(app.LINESPACES).join([ app.NEWLINE]), last=True) val_mat = ar.setval.convert_dct_to_matrix(val_dct, name_mat) # Check for the pattern # For the case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(symbs))) key_dct[None] = 0 key_mat = [ [key_dct[val]+1 if not isinstance(val, numbers.Real) else val for val in row] for row in key_mat] symb_ptt = app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL) symbs = [apf.first_capture(symb_ptt, symb) for symb in symbs] # Call the automol constructor zma = automol.zmat.from_data( symbs, key_mat, val_mat, name_mat, one_indexed=True, angstrom=True, degree=True) else: zma = None return zma
def opt_zmatrix(output_string): """ get optimized z-matrix geometry from output """ # read the matrix from the beginning of the output syms, key_mat, name_mat = ar.zmatrix.matrix.read( output_string, start_ptt=app.maybe(app.SPACES).join( ['geometry', app.escape('='), app.escape('{'), '']), entry_start_ptt=app.maybe(','), entry_sep_ptt=',', last=False, case=False) # read the initial z-matrix values from the beginning out the output if len(syms) == 1: val_dct = {} else: val_dct = ar.zmatrix.setval.read( output_string, entry_start_ptt='SETTING', val_ptt=app.one_of_these([app.EXPONENTIAL_FLOAT_D, app.NUMBER]), last=False, case=False) names = sorted(set(numpy.ravel(name_mat)) - {None}) caps_names = list(map(str.upper, names)) name_dct = dict(zip(caps_names, names)) assert set(caps_names) <= set(val_dct) val_dct = { name_dct[caps_name]: val_dct[caps_name] for caps_name in caps_names } # read optimized z-matrix values from the end of the output opt_val_dct = ar.zmatrix.setval.read( output_string, start_ptt=app.padded('Optimized variables') + app.NEWLINE, entry_end_ptt=app.one_of_these(['ANGSTROM', 'DEGREE']), last=True, case=False) opt_val_dct = { name_dct[caps_name]: opt_val_dct[caps_name] for caps_name in opt_val_dct.keys() } assert set(opt_val_dct) <= set(val_dct) val_dct.update(opt_val_dct) # call the automol constructor zma = automol.zmatrix.from_data(syms, key_mat, name_mat, val_dct, one_indexed=True, angstrom=True, degree=True) return zma
def _isotope_layers_pattern(): """ Build the autoparse regex pattern for the isotope layer. :rtype: str """ i_lyr_ptt = _layer_pattern(key_ptt='i') h_lyr_ptt = _layer_pattern(key_ptt='h') ptt = (i_lyr_ptt + app.maybe(SLASH + h_lyr_ptt) + app.maybe(SLASH + _stereo_layers_pattern())) return ptt
def _stereo_layers_pattern(): """ Build the autoparse regex pattern for the stereochemistry layer. :rtype: str """ b_lyr_ptt = _layer_pattern(key_ptt='b') t_lyr_ptt = _layer_pattern(key_ptt='t') m_lyr_ptt = _layer_pattern(key_ptt='m') s_lyr_ptt = _layer_pattern(key_ptt='s') ptt = (app.one_of_these([b_lyr_ptt, t_lyr_ptt]) + app.maybe(SLASH + t_lyr_ptt) + app.maybe(SLASH + m_lyr_ptt) + app.maybe(SLASH + s_lyr_ptt)) return ptt
def _stereo_layer(ich): """ Parse the InChI string for the stereochemisty layer. :param ich: InChI string :type ich: str :rtype: str """ ptt = (version_pattern() + SLASH + _formula_sublayer_pattern() + app.maybe(SLASH + _main_layer_pattern()) + app.maybe(SLASH + _charge_layer_pattern()) + SLASH + app.capturing(_stereo_layer_pattern())) lyr = apf.first_capture(ptt, ich) return lyr
def opt_zmatrix(output_string): """ get optimized z-matrix geometry from output """ # read the matrix from the beginning of the output syms, key_mat, name_mat = ar.zmatrix.matrix.read( output_string, start_ptt=app.padded(app.NEWLINE).join( [app.escape('Symbolic Z-matrix:'), app.LINE, '']), sym_ptt=ar.par.Pattern.ATOM_SYMBOL + app.maybe(app.UNSIGNED_INTEGER), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # read the values from the end of the output if len(syms) == 1: val_dct = {} else: val_dct = ar.zmatrix.setval.read( output_string, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Optimized Parameters', app.NONNEWLINE), app.LINE, app.LINE, app.LINE, app.LINE, '' ]), entry_sep_ptt='', entry_start_ptt=app.escape('!'), sep_ptt=app.maybe(app.LINESPACES).join([ app.escape('-DE/DX ='), app.FLOAT, app.escape('!'), app.NEWLINE ]), last=True) # for the case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(syms))) key_dct[None] = 0 key_mat = [[ key_dct[val] + 1 if not isinstance(val, numbers.Real) else val for val in row ] for row in key_mat] sym_ptt = app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL) syms = [apf.first_capture(sym_ptt, sym) for sym in syms] # call the automol constructor zma = automol.zmatrix.from_data(syms, key_mat, name_mat, val_dct, one_indexed=True, angstrom=True, degree=True) return zma
def stereo_layers(chi): """ Parse the ChI string for the stereo layers ('b', 't', 'm', and 's') :param chi: ChI string :type chi: str :returns: the stereo layers, as a dictionary keyed by layer prefixes :rtype: dict[str: str] """ ptt = (_version_pattern() + SLASH + _formula_pattern() + app.maybe(SLASH + _main_layers_pattern()) + app.maybe(SLASH + _charge_layers_pattern()) + SLASH + app.capturing(_stereo_layers_pattern())) lyrs_str = apf.first_capture(ptt, chi) return _layers(lyrs_str)
def reaction_unit_names(mech_str): """ units specified in the reaction block """ block_str = remove_blanks(reactions_block(mech_str)) a_unit_names, _ = zip(*A_UNITS) e_unit_names, _ = zip(*E_UNITS) a_pattern = (STRING_START + maybe(one_of_these(e_unit_names) + LINESPACES) + capturing(one_of_these(a_unit_names))) e_pattern = (STRING_START + maybe(one_of_these(a_unit_names) + LINESPACES) + capturing(one_of_these(e_unit_names))) a_unit_name = find_first_capture(a_pattern, block_str) e_unit_name = find_first_capture(e_pattern, block_str) return a_unit_name, e_unit_name
def troe(rxn_str): """ Parses the data string for a reaction in the reactions block for a line containing the Troe fitting parameters, then reads the parameters from this line. Only gets the 4 Troe-specific parameters: alpha, T***, T*, and T** :param rxn_str: raw Chemkin string for a single reaction :type rxn_str: str :return params: Troe fitting parameters :rtype: list(float) """ pattern = ( 'TROE' + app.zero_or_more(app.SPACE) + app.escape('/') + app.zero_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.one_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.one_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.maybe(app.one_or_more(app.SPACE) + app.capturing(app.NUMBER)) + app.zero_or_more(app.SPACE) + app.escape('/')) cap1 = apf.first_capture(pattern, rxn_str) if cap1 is not None: params = [] for val in cap1: if val is not None: params.append(float(val)) else: params.append(None) else: params = None return params
def _interpret_reagent_count(rgt_cnt_str): _pattern = (app.STRING_START + app.capturing(app.maybe(app.DIGIT)) + app.capturing(app.one_or_more(app.NONSPACE))) cnt, rgt = apf.first_capture(_pattern, rgt_cnt_str) cnt = int(cnt) if cnt else 1 rgts = (rgt, ) * cnt return rgts
def troe_parameters(rxn_dstr): """ Parses the data string for a reaction in the reactions block for a line containing the Troe fitting parameters, then reads the parameters from this line. :param rxn_dstr: data string for species in reaction block :type rxn_dstr: str :return params: Troe fitting parameters :rtype: list(float) """ pattern = ( 'TROE' + app.zero_or_more(app.SPACE) + app.escape('/') + app.zero_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.one_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.one_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.maybe(app.one_or_more(app.SPACE) + app.capturing(app.NUMBER)) + app.zero_or_more(app.SPACE) + app.escape('/')) cap1 = apf.first_capture(pattern, rxn_dstr) if cap1 is not None: params = [] for val in cap1: if val is not None: params.append(float(val)) else: params.append(None) else: params = None return params
def third_body(rxn_str): """ Parses the data string for a reaction in the reactions block for the line containing the chemical equation in order to read the names of the third body collider if present :param rxn_str: raw Chemkin string for a single reaction :type rxn_str: str :return trd_body: names of the colliders and corresponding fraction :rtype: tuple(str) """ pattern = _first_line_pattern(rct_ptt=app.capturing(SPECIES_NAMES_PATTERN), prd_ptt=SPECIES_NAMES_PATTERN, param_ptt=app.maybe(COEFF_PATTERN)) rgt_str = apf.first_capture(pattern, rxn_str) rgt_str = apf.remove(app.LINESPACES, rgt_str) rgt_split_paren = apf.split(CHEMKIN_PAREN_PLUS, rgt_str) rgt_split_plus = apf.split(app.PLUS, rgt_str) if len(rgt_split_paren) > 1: trd_body = '(+' + apf.split(CHEMKIN_PAREN_CLOSE, rgt_split_paren[1])[0] + ')' elif 'M' in rgt_split_plus: trd_body = '+M' else: trd_body = None trd_body = (trd_body, ) return trd_body
def entry_pattern(name_ptt=NAME_PATTERN, val_ptt=VALUE_PATTERN, sep_ptt=ENTRY_SEP_PATTERN, start_ptt=None, end_ptt=None): """ Builds pattern that match a line of a setvalue block where the value of a single coordinate of a Z-matrix is assigned. :param name_ptt: matches the variable name in the setval block :type name_ptt: str :param val_ptt: matches the numeric value in the setval block :type name_ptt: str :param sep_ptt: matches the separator between a variable name and its value, such as the equals sign in 'R1 = 5.00' :type sep_ptt: str :param start_ptt: matches at the start of a setval entry :type start_ptt: str :param end_ptt: matches at the end of a setval entry :rtype: str """ parts = (([] if start_ptt is None else [start_ptt]) + [name_ptt] + [sep_ptt] + [val_ptt] + ([] if end_ptt is None else [end_ptt])) ptt = app.padded(app.maybe(app.LINESPACES).join(parts)) return ptt
def pressure_region_specification(rxn_dstr): """ Parses the data string for a reaction in the reactions block for the line containing the chemical equation in order to check if a body M is given, indicating pressure dependence. :param rxn_dstr: data string for species in reaction block :type rxn_dstr: str :return pressure_region: type of pressure indicated :rtype: str """ pattern = app.capturing( _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, param_ptt=app.maybe(COEFF_PATTERN))) string = apf.first_capture(pattern, rxn_dstr) if string is not None: string = string.strip() if 'M' in string: # Presence of M denotes specific region assumptions if '(+M)' in string: pressure_region = 'falloff' else: pressure_region = 'lowp' else: # No M can be independent or not, depending on subsequent info if 'PLOG' in rxn_dstr or 'CHEB' in rxn_dstr: pressure_region = 'all' else: pressure_region = 'indep' else: pressure_region = None return pressure_region
def _interpret_reagent_count(rgt_cnt_str): _pattern = (STRING_START + capturing(maybe(DIGIT)) + capturing(one_or_more(NONSPACE))) cnt, rgt = find_first_capture(_pattern, rgt_cnt_str) cnt = int(cnt) if cnt else 1 rgts = (rgt, ) * cnt return rgts
class Pattern(): """ re patterns """ ATOM_SYMBOL = ( app.LETTER + app.maybe(app.LETTER) # + # app.maybe(app.one_or_more(app.NUMBER)) ) NUMERIC_VALUE = app.NUMBER
def formula(ich): """ Generate a formula dictionary from a ChI string. :param ich: ChI string :type ich: str :rtype: dict[str: int] """ sym_ptt = app.UPPERCASE_LETTER + app.zero_or_more(app.LOWERCASE_LETTER) num_ptt = app.maybe(app.UNSIGNED_INTEGER) ptt = app.capturing(sym_ptt) + app.capturing(num_ptt) def _connected_formula(ich): fml_str = formula_string(ich) fml = { s: int(n) if n else 1 for s, n in apf.all_captures(ptt, fml_str) } return fml # split it up to handle hard-coded molecules in multi-component inchis ichs = split(ich) fmls = list(map(_connected_formula, ichs)) fml = functools.reduce(automol.formula.join, fmls) return fml
def block_pattern(sym_ptt=par.Pattern.ATOM_SYMBOL, key_ptt=KEY_PATTERN, name_ptt=NAME_PATTERN, val_ptt=par.Pattern.NUMERIC_VALUE, mat_entry_start_ptt=None, mat_entry_sep_ptt=MAT_ENTRY_SEP_PATTERN, mat_entry_end_ptt=None, mat_line_start_ptt=None, mat_line_end_ptt=None, setv_start_ptt=SETVAL_START_PATTERN, setv_entry_sep_ptt=SETVAL_ENTRY_SEP_PATTERN, setv_entry_start_ptt=None, setv_sep_ptt=SETVAL_SEP_PATTERN, capture_matrix_block=False, capture_setval_block=False): """ full z-matrix pattern """ mat_ptt = _matrix_block_pattern(sym_ptt=sym_ptt, key_ptt=key_ptt, name_ptt=name_ptt, entry_start_ptt=mat_entry_start_ptt, entry_sep_ptt=mat_entry_sep_ptt, entry_end_ptt=mat_entry_end_ptt, line_start_ptt=mat_line_start_ptt, line_end_ptt=mat_line_end_ptt) setv_ptt = app.maybe( _setval_block_pattern(name_ptt=name_ptt, val_ptt=val_ptt, entry_sep_ptt=setv_entry_sep_ptt, entry_start_ptt=setv_entry_start_ptt, sep_ptt=setv_sep_ptt)) mat_ptt = app.capturing(mat_ptt) if capture_matrix_block else mat_ptt setv_ptt = app.capturing(setv_ptt) if capture_setval_block else setv_ptt block_ptt = app.padded(setv_start_ptt).join([mat_ptt, setv_ptt]) return block_ptt
def gradient(output_string): """ read gradient from the output string """ # Grab a block of text containing the gradient block_ptt = ('Molecular gradient' + app.capturing(app.one_or_more(app.WILDCARD, greedy=False)) + 'Molecular gradient norm') block = apf.last_capture(block_ptt, output_string) # Trim the block to start it at the gradient lines blank_count = 0 for i, line in enumerate(block.splitlines()): if line.strip() == '': blank_count += 1 if blank_count == 3: grad_start = i break trim_block = '\n'.join(block.splitlines()[grad_start:]) # Grab the gradient from the trimmed block string grad = ar.matrix.read( trim_block, line_start_ptt=app.LINESPACES.join([ app.LETTER, app.escape('#') + app.UNSIGNED_INTEGER, app.maybe(app.UNSIGNED_INTEGER)])) print(grad) assert numpy.shape(grad)[1] == 3 return grad
def remove_blanks(mech_str): """ remove blank lines as well as leading and trailing blanks """ blank_line = LINE_START + maybe(LINESPACES) + NEWLINE trailing_blanks = LINESPACES + LINE_END leading_blanks = LINE_START + LINESPACES pattern = one_of_these([blank_line, trailing_blanks, leading_blanks]) return find_remove(pattern, mech_str)
def _ccsd_t_f12_energy(output_string): ene = ar.energy.read( output_string, app.one_of_these([ app.escape('!CCSD(T)-F12b total energy') + app.maybe(':'), app.escape('!RHF-UCCSD(T)-F12b energy'), ])) return ene
def _mp2_energy(output_string): ene = ar.energy.read( output_string, app.one_of_these([ app.escape('!MP2 total energy') + app.maybe(':'), app.escape('!RMP2 energy'), ])) return ene
def clean_up_whitespace(string): """ remove leading spaces, trailing spaces, and empty lines from a string """ empty_line = app.LINE_START + app.maybe(app.LINESPACES) + app.NEWLINE trailing_spaces = app.LINESPACES + app.LINE_END leading_spaces = app.LINE_START + app.LINESPACES pattern = app.one_of_these([empty_line, trailing_spaces, leading_spaces]) return apf.remove(pattern, string)
def _doub_hyb_dft_energy(output_string): e_pattern = (app.escape('E') + app.maybe('2') + app.escape('(') + app.one_of_these([dft.upper() for dft in DOUB_HYB_DFT]) + app.escape(')')) dft_pattern = (e_pattern + app.SPACES + '=' + app.SPACES + app.EXPONENTIAL_FLOAT_D + app.SPACES + e_pattern + app.SPACES + '=') ene = ar.energy.read(output_string, start_ptt=dft_pattern) return ene
def _charge_layers_pattern(): """ Build the autoparse regex pattern for the charge layer. :rtype: str """ q_lyr_ptt = _layer_pattern(key_ptt='q') p_lyr_ptt = _layer_pattern(key_ptt='p') ptt = (app.one_of_these([q_lyr_ptt, p_lyr_ptt]) + app.maybe(SLASH + p_lyr_ptt)) return ptt
def _main_layers_pattern(): """ Build the autoparse regex pattern for the connectivity layer. :rtype: str """ c_lyr_ptt = _layer_pattern(key_ptt='c') h_lyr_ptt = _layer_pattern(key_ptt='h') ptt = (app.one_of_these([c_lyr_ptt, h_lyr_ptt]) + app.maybe(SLASH + h_lyr_ptt)) return ptt