def _formula_pattern(): """ Build the autoparse regex pattern for the chemical formual layer. :rtype: str """ ptt = _layer_pattern(key_ptt=app.not_followed_by(app.LOWERCASE_LETTER)) return ptt
def _split_reagent_string(rgt_str): """ Parses out the names of all the species given in a string with the chemical equation within the reactions block. :param rgt_str: string with the reaction chemical equation :type rgt_str: str :return rgts: names of the species in the reaction :type rgts: list(str) """ def _interpret_reagent_count(rgt_cnt_str): """ Count the species in a string containing one side of a chemical equation. :param rgt_cnt_str: string of one side of chemcial equation :type rgt_cnt_str: str :return: rgts: names of species from string :rtype: list(str) """ _pattern = (app.STRING_START + app.capturing(app.maybe(app.DIGIT)) + app.capturing(app.one_or_more(app.NONSPACE))) cnt, rgt = apf.first_capture(_pattern, rgt_cnt_str) cnt = int(cnt) if cnt else 1 rgts = (rgt, ) * cnt return rgts rgt_str = apf.remove(app.LINESPACES, rgt_str) rgt_str = apf.remove(CHEMKIN_PAREN_PLUS_EM, rgt_str) rgt_str = apf.remove(CHEMKIN_PLUS_EM, rgt_str) pattern = app.PLUS + app.not_followed_by(app.PLUS) rgt_cnt_strs = apf.split(pattern, rgt_str) rgts = tuple(itertools.chain(*map(_interpret_reagent_count, rgt_cnt_strs))) return rgts
def inp_zmatrix(inp_str): """ Reads the input z-matrix from the input file string Returns the Z-Matrix in Bohr and Radians. :param output_str: string of the program's output file :type output_str: str :rtype: automol molecular geometry data structure """ # Reads the matrix from the beginning of the input symbs, key_mat, name_mat = ar.vmat.read( inp_str, start_ptt=app.padded(app.NEWLINE).join([ app.escape('comment:'), app.LINE, app.LINE, '']), symb_ptt=(ar.par.Pattern.ATOM_SYMBOL + app.not_followed_by(app.SPACES + app.FLOAT) + app.maybe(app.UNSIGNED_INTEGER)), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # Reads the values from the input if all(x is not None for x in (symbs, key_mat, name_mat)): if len(symbs) == 1: # val_dct = {} val_mat = ((None, None, None),) else: val_dct = ar.setval.read( inp_str, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Variables:', app.NONNEWLINE), '']), entry_sep_ptt='', entry_start_ptt='', sep_ptt=app.maybe(app.LINESPACES).join([ app.NEWLINE]), last=True) val_mat = ar.setval.convert_dct_to_matrix(val_dct, name_mat) # Check for the pattern # For the case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(symbs))) key_dct[None] = 0 key_mat = [ [key_dct[val]+1 if not isinstance(val, numbers.Real) else val for val in row] for row in key_mat] symb_ptt = app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL) symbs = [apf.first_capture(symb_ptt, symb) for symb in symbs] # Call the automol constructor zma = automol.zmat.from_data( symbs, key_mat, val_mat, name_mat, one_indexed=True, angstrom=True, degree=True) else: zma = None return zma
def thermo_data_strings(mech_str): """ find all thermo data strings """ block_str = remove_blanks(thermo_block(mech_str)) start_pattern = LINE_START + not_followed_by( one_of_these([DIGIT, PLUS, escape('=')])) end_pattern = '1' + LINE_END headline_pattern = start_pattern + one_or_more(NONNEWLINE) + end_pattern thm_dstr_lst = _headlined_sections(headline_pattern, block_str) assert all( len(find_split_lines(thm_dstr)) == 4 for thm_dstr in thm_dstr_lst) return thm_dstr_lst
def data_strings(block_str): """ thermo strings """ headline_pattern = (app.LINE_START + app.not_followed_by( app.one_of_these([app.DIGIT, app.PLUS, app.escape('=')])) + app.one_or_more(app.NONNEWLINE) + app.escape('1') + app.LINE_END) thm_strs = headlined_sections( string=block_str.strip(), headline_pattern=headline_pattern, ) return thm_strs
def _has_scf_nonconvergence_error_message(output_str): """ Assess whether the output file string contains the message signaling the failure of the SCF procedure. :param output_str: string of the program's output file :type output_str: str :rtype: bool """ pattern = app.escape('No convergence') + app.not_followed_by( app.padded('in max. number of iterations')) return apf.has_match(pattern, output_str, case=False)
def _split_reagent_string(rgt_str): def _interpret_reagent_count(rgt_cnt_str): _pattern = (STRING_START + capturing(maybe(DIGIT)) + capturing(one_or_more(NONSPACE))) cnt, rgt = find_first_capture(_pattern, rgt_cnt_str) cnt = int(cnt) if cnt else 1 rgts = (rgt, ) * cnt return rgts rgt_str = find_remove(LINESPACES, rgt_str) rgt_str = find_remove(CHEMKIN_PAREN_PLUS_EM, rgt_str) rgt_str = find_remove(CHEMKIN_PLUS_EM, rgt_str) pattern = PLUS + not_followed_by(PLUS) rgt_cnt_strs = find_split(pattern, rgt_str) rgts = tuple(chain(*map(_interpret_reagent_count, rgt_cnt_strs))) return rgts
def _split_reagent_string(rgt_str): def _interpret_reagent_count(rgt_cnt_str): _pattern = (app.STRING_START + app.capturing(app.maybe(app.DIGIT)) + app.capturing(app.one_or_more(app.NONSPACE))) cnt, rgt = apf.first_capture(_pattern, rgt_cnt_str) cnt = int(cnt) if cnt else 1 rgts = (rgt, ) * cnt return rgts rgt_str = apf.remove(app.LINESPACES, rgt_str) rgt_str = apf.remove(CHEMKIN_PAREN_PLUS_EM, rgt_str) rgt_str = apf.remove(CHEMKIN_PLUS_EM, rgt_str) pattern = app.PLUS + app.not_followed_by(app.PLUS) rgt_cnt_strs = apf.split(pattern, rgt_str) rgts = tuple(itertools.chain(*map(_interpret_reagent_count, rgt_cnt_strs))) return rgts
def data_strings(block_str): """ Parse all of the NASA polynomials given in the thermo block of the mechanism input file and stores them in a list. :param block_str: string for thermo block :type block_str: str :return thm_strs: strings containing NASA polynomials for all species :rtype: list(str) """ headline_pattern = (app.LINE_START + app.not_followed_by( app.one_of_these([app.DIGIT, app.PLUS, app.escape('=')])) + app.one_or_more(app.NONNEWLINE) + app.escape('1') + app.LINE_END) thm_strs = headlined_sections(string=block_str.strip(), headline_pattern=headline_pattern) return thm_strs
def _has_scf_nonconvergence_error_message(output_string): """ does this output string have an SCF non-convergence message? """ pattern = app.escape('No convergence') + app.not_followed_by( app.padded('in max. number of iterations')) return apf.has_match(pattern, output_string, case=False)
def _formula_sublayer_pattern(): ptt = _sublayer_pattern(key_ptt=app.not_followed_by(app.LOWERCASE_LETTER)) return ptt
def opt_zmatrix(output_str): """ Reads the optimized Z-Matrix from the output file string. Returns the Z-Matrix in Bohr and Radians. :param output_str: string of the program's output file :type output_str: str :rtype: automol molecular geometry data structure """ if 'Optimized Parameters' in output_str: # Reads the matrix from the beginning of the output symbs, key_mat, name_mat = ar.vmat.read( output_str, start_ptt=app.padded(app.NEWLINE).join( [app.escape('Symbolic Z-matrix:'), app.LINE, '']), symb_ptt=(ar.par.Pattern.ATOM_SYMBOL + app.not_followed_by(app.SPACES + app.FLOAT) + app.maybe(app.UNSIGNED_INTEGER)), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # Reads the values from the end of the output if all(x is not None for x in (symbs, key_mat, name_mat)): grad_val = app.one_of_these([app.FLOAT, 'nan', '-nan']) if len(symbs) == 1: val_mat = ((None, None, None), ) else: val_dct = ar.setval.read( output_str, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Optimized Parameters', app.NONNEWLINE), app.LINE, app.LINE, app.LINE, app.LINE, '' ]), entry_sep_ptt='', entry_start_ptt=app.escape('!'), sep_ptt=app.maybe(app.LINESPACES).join([ app.escape('-DE/DX ='), grad_val, app.escape('!'), app.NEWLINE ]), last=True) val_mat = ar.setval.convert_dct_to_matrix(val_dct, name_mat) # Check for the pattern err_ptt = app.LINESPACES.join( [app.escape('-DE/DX ='), app.one_of_these(['nan', '-nan'])]) if 'Optimized Parameters' in output_str: test_str = output_str.split('Optimized Parameters')[1] if apf.has_match(err_ptt, test_str): print('Warning: Bad gradient value (nan)', 'in "Optimized Parameters" list.') # For case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(symbs))) key_dct[None] = 0 key_mat = [[ key_dct[val] + 1 if not isinstance(val, numbers.Real) else val for val in row ] for row in key_mat] symb_ptt = (app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL)) symbs = [apf.first_capture(symb_ptt, symb) for symb in symbs] # Call the automol constructor zma = automol.zmat.from_data(symbs, key_mat, val_mat, name_mat, one_indexed=True, angstrom=True, degree=True) else: zma = None else: zma = None return zma
""" molecular geometry and structure readers """ import numpy import autoread as ar import autoparse.pattern as app import autoparse.find as apf import automol MOLPRO_ENTRY_START_PATTERN = ( 'SETTING' + app.not_followed_by(app.padded('MOLPRO_ENERGY')) ) def opt_geometry(output_string): """ get optimized geometry from output """ ptt = app.padded(app.NEWLINE).join([ app.escape('Current geometry (xyz format, in Angstrom)'), '', app.UNSIGNED_INTEGER, (app.one_or_more(app.NONNEWLINE) + app.SPACES + 'ENERGY=' + app.FLOAT), '' ]) # app.padded(app.NEWLINE).join([ # app.escape('ATOMIC COORDINATES'), # app.LINE, app.LINE, app.LINE, '']), syms, xyzs = ar.geom.read( output_string,