def process_bin_max_min_vals(raw_key_val_tuple_list): """ Convert tuple to a dictionary with float values @param raw_key_val_tuple_list: raw entries to be processed @return: dictionaries of keys and float values """ hop_dict = {} min_dict = {} max_dict = {} for key, val in raw_key_val_tuple_list: try: val_list = [float(x.strip()) for x in val.split(',')] if len(val_list) in [1, 3]: hop_dict[key] = val_list[0] if len(val_list) == 3: if val_list[1] < val_list[2]: min_dict[key] = val_list[1] max_dict[key] = val_list[2] else: raise InvalidDataError( "Min value ({}) is not less than max value ({})" "".format(round(val_list[1], 6), round(val_list[2], 6))) else: raise InvalidDataError( "Unexpected number of values ({})".format(len(val_list))) except (ValueError, InvalidDataError) as e: raise InvalidDataError( "Encountered error '{}' For key '{}' in section {}, read: {}.\n" "Expected 1 or 3 comma-separated floats for each variable (key): the max " "hop (step) size, \noptionally followed by the min value, max value that " "should be obtained from hopping.".format( e.args[0], key, BASIN_HOP_MIN_MAX, val)) return hop_dict, min_dict, max_dict
def process_coords(cp2k_file, data_tpl_content): """ Creates the new atoms section based on coordinates from the cp2k file @param cp2k_file: file being read @param data_tpl_content: data from the template file @return: new atoms section, with replaced coordinates """ new_atoms = list(data_tpl_content[ATOMS_CONTENT]) atom_count = 0 atom_num = 0 for line in cp2k_file: split_line = line.split() if len(split_line) == 0: raise InvalidDataError( "Encountered an empty line after reading {} atoms. Expected to read " "coordinates for {} atoms before encountering a blank line." "".format(atom_num, data_tpl_content[NUM_ATOMS])) atom_num = int(split_line[0]) new_atoms[atom_count][4:7] = map(float, split_line[3:6]) atom_count += 1 if atom_num == data_tpl_content[NUM_ATOMS]: # If that is the end of the atoms, the next line should be blank line = next(cp2k_file).strip() if len(line) == 0: return new_atoms else: raise InvalidDataError( "After reading the number of atoms found in the template data file " "({}), did not encounter a blank line, but: {}" "".format(data_tpl_content[NUM_ATOMS], line)) # if went through even line and didn't get all the atoms, catch the error raise InvalidDataError( "Did not read coordinates from {} atoms in file: {}".format( data_tpl_content[NUM_ATOMS], cp2k_file.name))
def process_max_min_vals(raw_key_val_tuple_list, default_penalty): """ Convert tuple to a dictionary with float values @param raw_key_val_tuple_list: @param default_penalty: default penalty for the flat-bottomed potential @return: dictionary of keys and float values """ val_dict = {} for key, val in raw_key_val_tuple_list: try: val_list = [float(x.strip()) for x in val.split(',')] if len(val_list) == 2: val_dict[key] = val_list elif len(val_list) == 1: val_dict[key] = val_list + [default_penalty] else: raise InvalidDataError( "For key '{}' in max or min section, read: {}. \nExpected 1 or 2 values: " "either the edge of the potential and the penalty stiffness, or only the " "edge of the potential, which will be used with " "the default penalty for the flat-bottomed potential" "".format(key, val)) except ValueError as e: raise InvalidDataError( "Error in reading max or min value provided for key '{}': {}" "".format(key, e.args[0])) return val_dict
def find_atom_data(lammps_f, atom_ids): """Searches and returns the given file location for atom data for the given IDs. :param lammps_f: The LAMMPS data file to search. :param atom_ids: The set of atom IDs to collect. :return: A nested dict of the atoms found keyed first by time step, then by atom ID. :raises: InvalidDataError If the file is missing atom data or is otherwise malformed. """ tstep_atoms = OrderedDict() tstep_box = {} atom_count = len(atom_ids) empty_dims = np.full(3, np.nan) with open(lammps_f) as lfh: file_name = os.path.basename(lammps_f) tstep_id = None box_dim = np.copy(empty_dims) tstep_val = "(no value)" for line in lfh: if line.startswith(TSTEP_LINE): try: tstep_val = next(lfh).strip() tstep_id = int(tstep_val) # Todo: remove if never used except ValueError as e: raise InvalidDataError( "Invalid timestep value {}: {}".format(tstep_val, e)) elif line.startswith(NUM_ATOM_LINE): # not needed, so just move along next(lfh) elif line.startswith(BOX_LINE): try: for coord_id in range(len(box_dim)): box_vals = list(map(float, next(lfh).strip().split())) if len(box_vals) == 2: box_dim[coord_id] = box_vals[1] - box_vals[0] except (ValueError, KeyError) as e: raise InvalidDataError( "Invalid PBC value read on timestep {}: {}".format( tstep_val, e)) elif tstep_id is not None: atom_lines = find_atom_lines(lfh, atom_ids, tstep_id, file_name) if len(atom_lines) != atom_count: try: missing_atoms_err(atom_ids, atom_lines, tstep_id, file_name) except InvalidDataError as e: warning(e) warning("Skipping timestep and continuing.") else: tstep_atoms[tstep_id] = atom_lines tstep_box[tstep_id] = box_dim tstep_id = None box_dim = empty_dims return tstep_atoms, tstep_box
def process_cfg_conv(raw_cfg, def_cfg_vals=None, req_keys=None, int_list=True): """ Converts the given raw configuration, filling in defaults and converting the specified value (if any) to the default value's type. @param raw_cfg: The configuration map. @param def_cfg_vals: dictionary of default values @param req_keys: dictionary of required types @param int_list: flag to specify if lists should converted to a list of integers @return: The processed configuration. """ proc_cfg = {} for key in raw_cfg: if not (key in def_cfg_vals or key in req_keys): raise InvalidDataError( "Unexpected key '{}' in configuration ('ini') file.".format( key)) key = None try: for key, def_val in def_cfg_vals.items(): proc_cfg[key] = conv_raw_val(raw_cfg.get(key), def_val, int_list) for key, type_func in req_keys.items(): proc_cfg[key] = type_func(raw_cfg[key]) except KeyError as e: raise KeyError("Missing config val for key '{}'".format(key, e)) except Exception as e: raise InvalidDataError('Problem with config vals on key {}: {}'.format( key, e)) if proc_cfg[SCIPY_OPT_METHOD] != DEF_OPT_METHOD: proc_cfg[SCIPY_OPT_METHOD] = proc_cfg[SCIPY_OPT_METHOD].lower() if proc_cfg[SCIPY_OPT_METHOD] not in TESTED_SCIPY_MIN: warning( "Only the following optimization methods have been tested: scipy.optimize.minimize with {}." "".format(TESTED_SCIPY_MIN)) for int_key in [TEMP, NITER_SUCCESS]: if proc_cfg[int_key] is not None: proc_cfg[int_key] = float(proc_cfg[int_key]) # Remove any repeated parameters, or zero-character-length params (can happen if accidentally an additional comma) if len(proc_cfg[OPT_PARAMS]) > 0: filtered_opt_params = [] for param in proc_cfg[OPT_PARAMS]: if len(param) > 0: if param in filtered_opt_params: warning("'{}' repeated in '{}'; skipping repeated entry". format(param, OPT_PARAMS)) else: filtered_opt_params.append(param) proc_cfg[OPT_PARAMS] = filtered_opt_params return proc_cfg
def read_cfg(f_loc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param f_loc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() try: good_files = config.read(f_loc) except MissingSectionHeaderError: raise InvalidDataError(MISSING_SEC_HEADER_ERR_MSG.format(f_loc)) if not good_files: raise IOError('Could not read file {}'.format(f_loc)) # Start with empty template value dictionaries to be filled proc = {TPL_VALS: OrderedDict(), TPL_EQ_PARAMS: OrderedDict()} if MAIN_SEC not in config.sections(): raise InvalidDataError( "The configuration file is missing the required '{}' section". format(MAIN_SEC)) for section in config.sections(): if section == MAIN_SEC: try: proc.update( cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS)) except InvalidDataError as e: if 'Unexpected key' in e.message: raise InvalidDataError( e.message + " Does this belong \nin a template value section such as '[{}]'?" "".format(TPL_VALS_SEC)) elif section in [TPL_VALS_SEC, TPL_EQS_SEC]: val_ordered_dict = process_tpl_vals(config.items(section)) if section == TPL_EQS_SEC: # just keep the names, so we know special processing is required proc[TPL_EQ_PARAMS] = val_ordered_dict.keys() proc[TPL_VALS].update(val_ordered_dict) else: raise InvalidDataError( "Section name '{}' in not one of the valid section names: {}" "".format(section, VALID_SEC_NAMES)) return proc
def parse_cmdline(argv=None): """ Returns the parsed argument list and return code. :param argv: A list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser( description='Finds the distances between each pair ' 'of atoms listed in the pair file for ' 'each time step in the given LAMMPS dump ' 'file.') parser.add_argument( "-p", "--pair_files", action="append", default=[], help="One or more files containing atom pairs (default {0})".format( DEF_PAIRS_FILE)) parser.add_argument("-f", "--file", help="The dump file to process", default=None) parser.add_argument("-l", "--list_file", help="The file with a list of dump files to process", default=None) args = None try: args = parser.parse_args(argv) if not args.pair_files: args.pair_files.append(DEF_PAIRS_FILE) if not os.path.isfile(DEF_PAIRS_FILE): raise InvalidDataError( "No pair file specified and did not find the default " "pair file: {}".format(DEF_PAIRS_FILE)) if (args.file is None) and (args.list_file is None): raise InvalidDataError( "Specify either a file or list of files to process.") except (KeyError, InvalidDataError, SystemExit) as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning(e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def make_tpl(cfg, tpl_name, filled_tpl_name): """ Combines the dictionary and template file to create the new file(s) @param cfg: configuration for the run @param tpl_name: the cfg key for the template file name @param filled_tpl_name: the cfg key for the filled template file name """ tpl_str = read_tpl(tpl_name) tpl_vals_dict = {} for value_set in itertools.product(*cfg[TPL_VALS].values()): for param, val in zip(cfg[TPL_VALS].keys(), value_set): tpl_vals_dict[param] = val for eq_param in cfg[TPL_EQ_PARAMS]: try: string_to_eval = tpl_vals_dict[eq_param].format( **tpl_vals_dict) except KeyError as e: raise KeyError( "Missing parameter value {} needed to evaluate '{}' for the parameter '{}'." "".format(e, tpl_vals_dict[eq_param], eq_param)) try: tpl_vals_dict[eq_param] = eval(string_to_eval) except NameError: raise InvalidDataError( "Could not evaluate the string '{}' specifying the value for the parameter " "'{}'. Check order of equation entry and/or input parameter values." "".format(string_to_eval, eq_param)) fill_save_tpl(cfg, tpl_str, tpl_vals_dict, tpl_name, filled_tpl_name)
def main(argv=None): # Read input args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret len_buffer = None try: if args.buffer is not None: try: len_buffer = float(args.buffer) except ValueError: raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer)) if args.out_dir is None: args.out_dir = os.path.dirname(args.file) if args.min_max_file is None: min_max_dict = None else: min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC) process_file(args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict, header=args.names, make_hist=args.histogram) except IOError as e: warning("Problems reading file:", e) return IO_ERROR except InvalidDataError as e: warning("Problems reading data:", e) return INVALID_DATA return GOOD_RET # success
def read_cfg(f_loc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param f_loc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() good_files = config.read(f_loc) if not good_files: raise IOError('Could not read file {}'.format(f_loc)) main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS) # To fix; have this as default! main_proc[DATA_FILES] = [] if os.path.isfile(main_proc[DATA_FILES_FILE]): with open(main_proc[DATA_FILES_FILE]) as f: for data_file in f: main_proc[DATA_FILES].append(data_file.strip()) if main_proc[DATA_FILE] is not None: main_proc[DATA_FILES].append(main_proc[DATA_FILE]) if len(main_proc[DATA_FILES]) == 0: raise InvalidDataError("No files to process: no '{}' specified and " "no list of files found for: {}".format( DATA_FILE, main_proc[DATA_FILES_FILE])) return main_proc
def process_cp2k_file(cp2k_file, data_tpl_content, data_template_fname): new_atoms_section = None with open(cp2k_file) as f: data_tpl_content[HEAD_CONTENT][0] = "Created on {} by {} version {} from template file {} and " \ "cp2k output file {}".format(datetime.now(), __name__, __version__, data_template_fname, cp2k_file ) for line in f: line = line.strip() if ENERGY_PAT.match(line): qmmm_energy = line.split()[-1] if COORD_PAT.match(line): # Now advance to first line of coordinates for _ in range(3): next(f) new_atoms_section = process_coords(f, data_tpl_content) # If we successfully returned the new_atoms_section, make new file if new_atoms_section is None: raise InvalidDataError( "Did not file atoms coordinates in file: {}".format(cp2k_file)) print("{} energy: {}".format(cp2k_file, qmmm_energy)) f_name = create_out_fname(cp2k_file, ext='.data') list_to_file(data_tpl_content[HEAD_CONTENT] + new_atoms_section + data_tpl_content[TAIL_CONTENT], f_name, print_message=False)
def process_file(file_to_process, cfg): """ Will complete the work of this script based on the provided cfg @param file_to_process: the file with column to be combined @param cfg: the configuration of this run @return: errors or nothing """ to_print = [] # determine if any type conversion has been specified & create conv dict if needed if cfg[COL1_CONV] is None and cfg[COL2_CONV] is None: conv_dict = None else: conv_dict = {} if cfg[COL1_CONV] is not None: conv_dict[cfg[COL1]] = cfg[COL1_CONV] if cfg[COL2_CONV] is not None: conv_dict[cfg[COL2]] = cfg[COL2_CONV] raw_col_data = read_csv(file_to_process, data_conv=conv_dict, quote_style=csv.QUOTE_NONNUMERIC) for header in cfg[COL1], cfg[COL2]: if header not in raw_col_data[0]: raise InvalidDataError("Specified column header '{}' was not found in file: {}" "".format(header, file_to_process)) for row in raw_col_data: to_print.append(["".join(map(str, [cfg[PREFIX], row[cfg[COL1]], cfg[MIDDLE], row[cfg[COL2]], cfg[SUFFIX]]))]) list_to_csv(to_print, cfg[OUT_FILE], delimiter=',', quote_style=csv.QUOTE_MINIMAL)
def find_section_state(line, current_section, section_order, content, highlight_content): """ In addition to finding the current section by matching patterns, resets the count and adds to lists that are keeping track of the data being read @param line: current line of data file @param current_section: current section @param section_order: list keeping track of when find an new section @param content: dictionary; add a new key for each section found @param highlight_content: keep a list of selected content to output (interactions with specified atoms) @return: the section currently reading, count """ for section, pattern in SEC_PAT_DICT.items(): if pattern.match(line): section_order.append(section) content[section] = [] highlight_content[section] = [] return section, 1 if current_section is None: raise InvalidDataError( "Could not identify section from line: {}".format(line)) else: return current_section, 1
def check_vals(config, sec_name): """ Reads the max or min vals section of the given config file, returning a dict containing the original string key paired with a float representing the max or min value. If there is no specified section, an empty dict is returned. Invalid values result in DataExceptions. :param config: The parsed config file that contains a max and/or min section. :param sec_name: the name of the section with string/float pairs to digest :return: A dict mapping the original column key to the float limit value. """ limit_vals = {} limit_val = np.nan col_name = None try: for col_name, limit_val in config.items(sec_name): # I don't test for non-unique column name because, if a col_name appears twice, the parser has already # handled it by overwriting the value for that key limit_vals[col_name] = float(limit_val) except NoSectionError: # not a problem pass except ValueError: raise InvalidDataError( "For section '{}' key '{}', could not convert value '{}' to a float." .format( sec_name, col_name, limit_val, )) return limit_vals
def read_cfg(floc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param floc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() try: good_files = config.read(floc) if not good_files: raise IOError('Could not read file {}'.format(floc)) main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS, int_list=False) except (ParsingError, KeyError) as e: raise InvalidDataError(e) # Check the config file does not have sections that will be ignored for section in config.sections(): if section not in SECTIONS: warning( "Found section '{}', which will be ignored. Expected section names are: {}" .format(section, ", ".join(SECTIONS))) # # Validate conversion input for conv in [COL1_CONV, COL2_CONV]: if main_proc[conv]: main_proc[conv] = conv_str_to_func(main_proc[conv]) return main_proc
def process_file(data_file, mcfg, delimiter=','): list_vectors, headers = read_csv_to_list(data_file, delimiter=delimiter, header=True) col_index_dict = {} for section in SUB_SECTIONS: col_index_dict[section] = {} for key, val in mcfg[section].items(): if key in headers: # Parser already made sure that unique entries col_index_dict[section][headers.index(key)] = val else: raise InvalidDataError( "Key '{}' found in configuration file but not in data file: " "{}".format(key, data_file)) edited_vectors = [] for row in list_vectors: for col, max_val in col_index_dict[MAX_SEC].items(): if row[col] > max_val: row[col] = max_val for col, min_val in col_index_dict[MIN_SEC].items(): if row[col] < min_val: row[col] = min_val edited_vectors.append(row) f_name = create_out_fname(data_file, ext='.csv') list_to_csv([headers] + edited_vectors, f_name, delimiter=',')
def adjust_atom_xyz(cfg, data_tpl_content): """ If this options is selected, adjust the xyz coordinates as specified @param cfg: configuration for the run @param data_tpl_content: processed data from the template @return: will print new data files or raise InvalidDataError """ if cfg[ADJUST_ATOM] > data_tpl_content[NUM_ATOMS]: raise InvalidDataError( "Keyword '{}' specified atom index {} to have its XYZ coordinates adjusted, " "but found only " "{} atoms in the data template file: {}".format( ADJUST_ATOM, cfg[ADJUST_ATOM], data_tpl_content[NUM_ATOMS], cfg[DATA_TPL_FILE])) diff_vector = np.asarray((np.subtract(cfg[XYZ2], cfg[XYZ1]))) inc_vector = np.divide(diff_vector, cfg[XYZ_STEPS]) head_content = data_tpl_content[HEAD_CONTENT] atoms_content = data_tpl_content[ATOMS_CONTENT] tail_content = data_tpl_content[TAIL_CONTENT] # since python is zero-based, must subtract 1 adjust_atom_num = cfg[ADJUST_ATOM] - 1 for multiplier in range(-cfg[XYZ_STEPS_EXTEND], cfg[XYZ_STEPS] + cfg[XYZ_STEPS_EXTEND]): f_name = create_out_fname(cfg[DATA_TPL_FILE], suffix='_' + str(multiplier), ext='.data') atoms_content[adjust_atom_num][4:7] = np.round( multiplier * inc_vector + cfg[XYZ1], 6) list_to_file(head_content + atoms_content + tail_content, f_name)
def process_output_file(cfg): """ Reads in an initial set of parameters values from a space-separated list, as provided by 'fit.best' output from fitEVB. The order is important; thus read through the sections and parameters from the (ordered) lists (specified in the constants @param cfg: the configuration for this run @return: initial values to use in fitting, with both the high and low values set to that initial value """ vals = {} best_file = cfg[MAIN_SEC][BEST_FILE] if best_file is not None: raw_vals = np.loadtxt(best_file, dtype=np.float64) if len(raw_vals) != cfg[MAIN_SEC][PARAM_NUM]: raise InvalidDataError( "The total number of parameters for the specified sections ({}) does not " "equal the total number of values ({}) in the specified fitEVB output file: {}" "".format(cfg[MAIN_SEC][PARAM_NUM], len(raw_vals), best_file)) param_index = 0 if best_file is not None: for section in cfg[MAIN_SEC][SECTIONS]: vals[section] = {} for param in FIT_PARAMS[section]: vals[section][param] = { LOW: raw_vals[param_index], HIGH: raw_vals[param_index] } param_index += 1 return vals
def read_cfg(floc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param floc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() good_files = config.read(floc) if not good_files: raise IOError("Could not read file '{}'".format(floc)) main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS, int_list=False) main_proc[CP2K_FILES] = [] if os.path.isfile(main_proc[CP2K_LIST_FILE]): main_proc[CP2K_FILES] += file_rows_to_list(main_proc[CP2K_LIST_FILE]) if main_proc[CP2K_FILE] is not None: main_proc[CP2K_FILES].append(main_proc[CP2K_FILE]) if len(main_proc[CP2K_FILES]) == 0: raise InvalidDataError( "Found no file names to process. Use the configuration ('ini') file to specify the name " "of a single file with the keyword '{}' or a file with listing files to process " "(one per line) with the keyword '{}'.".format( CP2K_FILE, CP2K_LIST_FILE)) return main_proc
def read_cfg(floc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param floc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() good_files = config.read(floc) if not good_files: raise IOError('Could not read file {}'.format(floc)) main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS) main_proc[CALC_HIJ_NEW] = False # first see if we will calculate it for key in NEW_PARAMS: if main_proc[key] is not None: main_proc[CALC_HIJ_NEW] = True break if main_proc[CALC_HIJ_NEW]: for key in NEW_PARAMS: try: main_proc[key] = float(main_proc[key].split(',')[0]) except (TypeError, ValueError, AttributeError): if main_proc[key] is None: first_warn = "Missing input value for key '{}'. ".format( key) else: first_warn = "Found '{}' for key '{}'. ".format( main_proc[key], key) raise InvalidDataError(first_warn + "Require float inputs for keys: {}" "".format(NEW_PARAMS)) if main_proc[ALIGN_COL] not in [TIMESTEP, FILE_NAME]: raise InvalidDataError( "The program currently can only align CEC data on either '{}' or '{}'" .format(TIMESTEP, FILE_NAME)) if main_proc[CALC_CEC_DIST] and main_proc[EVB_SUM_FILE] is None: raise InvalidDataError( "To calculate CEC distances ('{}' set to True), an '{}' must be specified." .format(CALC_CEC_DIST, EVB_SUM_FILE)) return main_proc
def parse_cmdline(argv): """ Returns the parsed argument list and return code. `argv` is a list of arguments, or `None` for ``sys.argv[1:]``. """ if argv is None: argv = sys.argv[1:] # initialize the parser object: parser = argparse.ArgumentParser( description= 'For each timestep, gather the energy information output by LAMMPS ' 'in the log file.') parser.add_argument("-f", "--file", help="The log file to be processed.", default=None) parser.add_argument( "-l", "--list_file", help="The a file with a list of log files to be processes.", default=None) args = None try: args = parser.parse_args(argv) if args.file is None: args.file_list = [] else: if os.path.isfile(args.file): args.file_list = [args.file] args.source_name = args.file else: raise IOError("Could not find specified log file: {}".format( args.file)) if args.list_file is not None: args.file_list += file_rows_to_list(args.list_file) args.source_name = args.list_file if len(args.file_list) < 1: raise InvalidDataError( "Found no log file names to process. Specify one or more files as specified in " "the help documentation ('-h').") except IOError as e: warning("Problems reading file:", e) parser.print_help() return args, IO_ERROR except (KeyError, InvalidDataError, SystemExit) as e: if hasattr(e, 'code') and e.code == 0: return args, GOOD_RET warning(e) parser.print_help() return args, INPUT_ERROR return args, GOOD_RET
def read_cfg(f_loc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param f_loc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() good_files = config.read(f_loc) if not good_files: raise IOError('Could not read file {}'.format(f_loc)) # since not all string lists and not all int lists, import as string and selectively make ints main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS, int_list=False) for key in [RESID_QMMM, RESID_QM]: for index, entry in enumerate(main_proc[key]): try: main_proc[key][index] = int(entry) except: raise InvalidDataError( "Encountered '{}' when expected only integers in list for keyword '{}'" "".format(entry, key)) if (len(main_proc[RESID_QMMM]) + len(main_proc[RESID_QM])) > 0: main_proc[PRINT_FOR_CP2K] = True if main_proc[ELEMENT_DICT_FILE] is None: main_proc[ELEMENT_DICT_FILE] = DEF_ELEM_DICT_FILE if main_proc[RADII_DICT_FILE] is None: main_proc[RADII_DICT_FILE] = DEF_RADII_DICT_FILE if main_proc[RENUM_MOL] and main_proc[MOL_RENUM_FILE] is not None: raise InvalidDataError( "This program does not currently support both '{}' and '{}'" "".format(RENUM_MOL, MOL_RENUM_FILE)) return main_proc
def missing_atoms_err(atom_ids, found_atoms, tstep_id, file_name): """Creates and raises an exception when the function is unable to find atom data for all of the requested IDs. :param atom_ids: The atoms that were requested. :param found_atoms: The collection of atoms found. :param tstep_id: The time step ID where the atom data was missing. :param file_name: the file name with the time step ID where atom was missing. :raises: InvalidDataError Describing the missing atom data. """ missing = map(str, atom_ids.difference(found_atoms.keys())) raise InvalidDataError( MISSING_ATOMS_MSG.format(",".join(missing), tstep_id, file_name))
def process_conv_tpl_keys(raw_key_val_tuple_list): """ In case there are multiple (comma-separated) values, split on comma and strip. If possible, convert to int or float; otherwise. Return the tuple as a processed ordered dict @param raw_key_val_tuple_list: key-value dict read from configuration file; check for commas to indicate multiple parameters, and converted to int or floats if amenable @return val_dict: a dictionary of values @return dir_dict: a dictionary of initial directions for minimization """ val_dict = OrderedDict() dir_dict = {} for key, val in raw_key_val_tuple_list: val_list = [x.strip() for x in val.split(',')] val_num = len(val_list) if val_num == 1: # if it can be converted, do so; this helps with my printing formatting val_dict[key] = conv_num(val_list[0]) dir_dict[key] = DEF_DIR elif val_num == 2: # if there are two values, assume that it is a float with the ability to be optimized try: val_dict[key] = float(val_list[0]) dir_dict[key] = float(val_list[1]) except ValueError: raise InvalidDataError( "For key '{}', read '{}', which could not be converted to floats. When two " "values are provided, they are read as an initial float that may be optimized, " "and the initial search direction for optimization.". format(key, val)) else: raise InvalidDataError( "For key '{}', {} values were found ({}). Each parameter should have either one or " "two specified values (x0, optionally followed by initial search direction, which " "defaults to {}.".format(key, val_num, val, DEF_DIR)) return val_dict, dir_dict
def check_atom_num(req_atom_num, last_line, file_name): """ If applicable, make sure read the expected number of atoms @param req_atom_num: None if not specified; otherwise an int @param last_line: the last line read before a summary section; the first number of that line identifies the number of atoms in the last section @param file_name: name of file used for error message @return: raise InvalidDataError() if did not find the required """ if req_atom_num is not None: num_atoms = int(last_line.split()[0]) if num_atoms != req_atom_num: raise InvalidDataError( "Based on user specified num_atoms, expected to have read {} atoms, " "but read {} in file: {}".format(req_atom_num, num_atoms, file_name))
def gather_out_field_names(cfg): """ Based on user options, determine which field names to use in printing output @param cfg: configuration for run @return: list of field names to be printed for selected options """ selected_field_names = [] for option_name, fieldnames in OPT_FIELD_NAME_DICT.items(): if cfg[option_name]: for f_name in fieldnames: if f_name not in selected_field_names: selected_field_names.append(f_name) if len(selected_field_names) > 0: return selected_field_names else: raise InvalidDataError( 'None of the following options were selected, so no data will be collected: {}' ''.format(OPT_FIELD_NAME_DICT.keys()))
def find_header_values(line, nums_dict): """ Comprehend entries in lammps data file header @param line: line in header section @param nums_dict: dictionary keep track of total numbers for types (lammps header data) @return: updated nums_dict or error """ try: for num_key, pattern in HEADER_PAT_DICT.items(): if nums_dict[num_key] is None: pattern_match = pattern.match(line) if pattern_match: # regex is 1-based nums_dict[num_key] = int(pattern_match.group(1)) return except (ValueError, KeyError) as e: raise InvalidDataError( "While reading a data file, encountered error '{}' on line: {}". format(e, line))
def read_cfg(f_loc, cfg_proc=process_cfg): """ Reads the given configuration file, returning a dict with the converted values supplemented by default values. :param f_loc: The location of the file to read. :param cfg_proc: The processor to use for the raw configuration values. Uses default values when the raw value is missing. :return: A dict of the processed configuration file's data. """ config = ConfigParser() good_files = config.read(f_loc) if not good_files: raise IOError('Could not read file {}'.format(f_loc)) main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS) rel_e_proc = {} if REL_E_SEC in config.sections(): for entry in config.items(REL_E_SEC): section_prefix = entry[0] vals = entry[1].split(',') # when the ini file is read, upper case becomes lower, so I'll ignore case in pattern matching base_e_match_pat = re.compile(r"^" + section_prefix + ".*", re.I) base_e_file_name = vals[0] try: base_e_timestep = int(vals[1]) except ValueError: raise InvalidDataError( "Could not convert second entry in '{}' to an integer (expected an " "integer timestep)".format(entry[1])) rel_e_proc[section_prefix] = { REL_E_PAT: base_e_match_pat, FILE_NAME: base_e_file_name, TIMESTEP: base_e_timestep, REL_E_REF: np.nan, MIN_DIAB_ENE: np.inf, } main_proc[REL_E_SEC] = rel_e_proc if not main_proc[PRINT_PER_FILE] and not main_proc[PRINT_PER_LIST]: main_proc[PRINT_PER_LIST] = True warning("'{}' set to '{}'; setting '{}' to '{}'".format( PRINT_PER_FILE, main_proc[PRINT_PER_FILE], PRINT_PER_LIST, main_proc[PRINT_PER_LIST])) return main_proc
def deprotonate(cfg, protonatable_res, excess_proton, dump_h3o_mol, water_mol_dict, box, tpl_data): """ Deprotonate a the residue and assign the proton to the closest water so that the output data matches with the template. """ # Convert excess proton to a hydronium proton excess_proton[1] = tpl_data[H3O_MOL][0][1] # molecule number excess_proton[2] = cfg[H3O_H_TYPE] # type excess_proton[3] = tpl_data[H3O_H_CHARGE] # charge dump_h3o_mol.append(excess_proton) min_dist_id = None min_dist = np.linalg.norm(box) for mol_id, molecule in water_mol_dict.items(): for atom in molecule: if atom[2] == cfg[WAT_O_TYPE]: dist = pbc_dist(np.asarray(excess_proton[4:7]), np.asarray(atom[4:7]), box) if dist < min_dist: min_dist_id = mol_id min_dist = dist logger.debug('Deprotonated residue: the molecule ID of the closest water ' '(to become a hydronium) is {}.'.format(min_dist_id)) # Now that have the closest water, add its atoms to the hydronium list for atom in water_mol_dict[min_dist_id]: dump_h3o_mol.append(atom) # Remove the closest water from the dictionary of water molecules, and convert it to a hydronium del water_mol_dict[min_dist_id] for atom in dump_h3o_mol: if atom[2] == cfg[WAT_O_TYPE]: atom[2] = cfg[H3O_O_TYPE] atom[3] = tpl_data[H3O_O_CHARGE] elif atom[2] == cfg[WAT_H_TYPE]: atom[2] = cfg[H3O_H_TYPE] atom[3] = tpl_data[H3O_H_CHARGE] # Make the atom type and charge of the protonatable residue the same as for the template file (switching # from protonated to deprotonated residue) if len(tpl_data[PROT_RES_MOL]) != len(protonatable_res): raise InvalidDataError( 'Encountered dump file in which the number of atoms in the ' 'protonatable residue does not equal the number of atoms in the template data file.' )
def eval_eqs(cfg, tpl_vals_dict): """ Evaluates equations based on @param cfg: configuration for the run @param tpl_vals_dict: dictionary of variable values to be used to evaluate equations and fill templates """ for eq_param in cfg[TPL_EQ_PARAMS]: try: string_to_eval = cfg[TPL_VALS][eq_param].format(**tpl_vals_dict) except KeyError as e: raise KeyError( "Missing parameter value {} needed to evaluate '{}' for the parameter '{}'." "".format(e, tpl_vals_dict[eq_param], eq_param)) try: tpl_vals_dict[eq_param] = eval(string_to_eval) except NameError: raise InvalidDataError( "Could not evaluate the string '{}' specifying the value for the parameter " "'{}'. Check equation order, equations, and/or parameter values." "".format(string_to_eval, eq_param))