def __init__(self, part2_string): self._part2_string = part2_string self.data = DataContainer(self._COLUMN_TITLES) self._parse() if not self.data.get_rows(): raise QFepOutputError("Part2 is empty (no rows).")
def dg_all(self): """DataContainer with all main and subcalc free energies.""" subcalcs = sorted(self.sub_calcs.keys()) coltitles = ["Qfep_output", "dG*", "dG0", "dG_lambda"] for sc in subcalcs: coltitles.extend([ "{}_dG*".format(sc), "{}_dG0".format(sc), "{}_dG_lambda".format(sc) ]) dc = DataContainer(coltitles) for qfep_output in sorted(self.qfos): relp = os.path.relpath(qfep_output) dga = self.dgas.get(qfep_output, None) dg0 = self.dg0s.get(qfep_output, None) dg_fep = self.dgs_fep.get(qfep_output, None) row = [relp, dga, dg0, dg_fep] for subcalc in subcalcs: dga = self.sub_calcs[subcalc].dgas.get(qfep_output, None) dg0 = self.sub_calcs[subcalc].dg0s.get(qfep_output, None) dg_fep = self.sub_calcs[subcalc].dgs_fep.get(qfep_output, None) row.extend([dga, dg0, dg_fep]) dc.add_row(row) return dc
class _QFepPart1(object): """Class for parsing and storing data from Part1 in Qfep output. Part1 contains free energies vs. lambda (FEP). If parsing is unsuccessful QFepOutputError is raised, else all the data is stored in DataContainer object 'data'. Usage: cols=["Lambda", "dG"] dG_lambda = _QFepPart1.data.get_rows(columns=cols) Args: part1_string (string): string of Part1 in qfep output """ _PART1_HEADER = "# lambda(1) dGf sum(dGf) dGr sum(dGr) <dG>" _COLUMN_TITLES = ["Lambda", "dGf", "sum_dGf", "dGr", "sum_dGr", "dG"] def __init__(self, part1_string): self._part1_string = part1_string self.data = DataContainer(self._COLUMN_TITLES) self._parse() if not self.data.get_rows(): raise QFepOutputError("Part1 is empty (no rows).") @property def dg(self): """Return final dG(lambda) (FEP)""" return self.data.get_columns(["dG"])[0][-1] def _parse(self): lines = self._part1_string.split('\n') # the first line is a comment lines.pop(0) ## In newer versions of Q, two additional lines are printed # to distinguish between 'full', 'exclusions' and 'qcp' # check for the two extra lines and remove them if "Calculation" in lines[1]: lines = lines[2:] # comment with column names header = lines.pop(0).strip() if header != self._PART1_HEADER: raise QFepOutputError("Part1 has a wrong header, did the qfep5 " "binary change?") for line in lines: line = re.split("#|\!", line)[0].strip() if not line: continue row = [float(x) for x in line.split()] self.data.add_row(row)
def lra_stats(self): """Calculate average and st.dev of LRA and reorg energies.""" average_lras = DataContainer([ "E_type", "(E2-E1)_10_mean", "(E2-E1)_10_std", "(E2-E1)_01_mean", "(E2-E1)_01_std", "LRA_mean", "LRA_std", "REORG_mean", "REORG_std" ]) allvals = [] for lra in self.lras.values(): rows = lra.get_rows() for irow, row in enumerate(rows): try: allvals[irow].append(row) except IndexError: allvals.append([ row, ]) # allvals now looks like this: # [ # [ # ["EQtot", EQtot_de_st1_1, EQtot_de_st2_1, EQtot_lra_1, EQtot_reorg_1], # ["EQtot", EQtot_de_st1_2, EQtot_de_st2_2, ...], ... # ], # [ # ["EQbond", EQbond_de_st1_1, EQbond_de_st2_1, EQbond_lra_1, EQbond_reorg_1], # ["EQbond", EQbond_de_st1_2, EQbond_de_st2_2, ...], ... # ] # ] # for values in allvals: # transpose to get [ ["EQtot","EQtot"...], # [ EQtot_de_st1_1, EQtot_de_st1_2,...], # [ EQtot_de_st2_1, EQtot_de_st2_2,...], ...] values = zip(*values) # now they can be easily averaged and std-ed e_type = values[0][0] de_st1_mean = np.mean(values[1]) de_st2_mean = np.mean(values[2]) lra_mean = np.mean(values[3]) reo_mean = np.mean(values[4]) de_st1_std = np.std(values[1]) de_st2_std = np.std(values[2]) lra_std = np.std(values[3]) reo_std = np.std(values[4]) average_lras.add_row([ e_type, de_st1_mean, de_st1_std, de_st2_mean, de_st2_std, lra_mean, lra_std, reo_mean, reo_std ]) return average_lras
def __init__(self, part3_string): self._part3_string = part3_string self.data = DataContainer(self._COLUMN_TITLES) self._dga = None self._dg0 = None self._maxima_bins = None self._minima_bins = None self.warning = None self._parse() if not self.data.get_rows(): raise QFepOutputError("Part3 is empty (no rows).")
class _QFepPart2(object): """Class for parsing and storing data from Part2 in Qfep output. Part2 contains the results of the binning process - EVB ground state free energy vs. lambda and Egap, as well as the diabatic free energy profiles. If parsing is unsuccessful QFepOutputError is raised, else all the data is stored in DataContainer object 'data'. Usage: cols=["Lambda", "dGg"] dGg_lambda = _QFepPart2.data.get_rows(columns=cols) Args: part2_string (string): string of Part2 in qfep output """ _PART2_HEADER = "# Lambda(1) bin Energy gap dGa dGb dGg "\ "# pts c1**2 c2**2" _COLUMN_TITLES = [ "Lambda", "bin", "Egap", "dGa", "dGb", "dGg", "points", "c1**2", "c2**2" ] def __init__(self, part2_string): self._part2_string = part2_string self.data = DataContainer(self._COLUMN_TITLES) self._parse() if not self.data.get_rows(): raise QFepOutputError("Part2 is empty (no rows).") def _parse(self): lines = self._part2_string.split('\n') # the first line is a comment lines.pop(0) # comment with column names header = lines.pop(0).strip() if header != self._PART2_HEADER: raise QFepOutputError("Part2 has a wrong header, did the qfep5 " "binary change?") for line in lines: line = re.split("#|\!", line)[0].strip() if not line: continue row = [float(x) for x in line.split()] self.data.add_row(row)
def get_temps(self, stride=1): """ Get temperatures from all logfiles combined. Args: stride (int, optional): use only every Nth point, default=1 Returns: temperatures (DataContainer) """ # "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent" cts = self.analysed[0].data_temp.column_titles temps = DataContainer(cts) for qdo in self.analysed: rows = qdo.data_temp.get_rows() for row in rows[::stride]: temps.add_row(row) return temps
def __init__(self, part0_string, num_evb_states, calc_index): self._part0_string = part0_string self._num_evb_states = num_evb_states self._calc_index = calc_index self.data_state = [DataContainer(self._COLUMN_TITLES) for _ in range(num_evb_states)] self._parse(calc_index) for e_dc in self.data_state: if not e_dc.get_rows(): raise QFepOutputError("Part0 is empty (no rows).")
def get_offdiags(self, stride=1): """Get distances from all logfiles combined. Args: stride (int, optional): use only every Nth point, default=1 Returns: distances (dict): e.g: { "13_31": DataContainer, "13_18": DataContainer } """ coltitles = self.analysed[0].data_offdiags.column_titles dists = DataContainer(coltitles) for qdo in self.analysed: rows = qdo.data_offdiags.get_rows() for row in rows[::stride]: dists.add_row(row) return dists
def __init__(self, qcalc_exec, calcdirs, pdb_file, en_list_fn, lambdas_A, lambdas_B, resid_first, resid_last, scale_ionized, nthreads, qmask=None): self._en_list_fn = en_list_fn self._qcalc_exec = qcalc_exec try: self._pdb_qstruct = QStruct(pdb_file, "pdb") except QStructError as error_msg: raise QGroupContribError("Can't parse PDB file '{}': {}" "".format(pdb_file, error_msg)) self._calcdirs = [os.path.relpath(cd) for cd in calcdirs] self._nthreads = nthreads self._lambdas_A = lambdas_A self._lambdas_B = lambdas_B self._resid_first = resid_first self._resid_last = resid_last self._scale_ionized = scale_ionized self._qmask = qmask self._qcalc_io = ODict() self.gcs = ODict() self.failed = ODict() self.qcalc_version = None self.kill_event = threading.Event() lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0] sci = self._scale_ionized colnames = ["Residue id", "Residue name", "N", "VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "El(l={:5.4f}->l={:5.4f})_(scale={})_mean" "".format(lambda1_st1, lambda2_st1, sci), "El(l={:5.4f}->l={:5.4f})_(scale={})_stdev" "".format(lambda1_st1, lambda2_st1, sci)] self.gcs_stats = DataContainer(colnames)
def get_energies(self, e_type, stride=1): """Get energies from all logfiles combined. Args: e_type (string): keys in QDynOutput.map_en_section dictionary stride (int, optional): use only every Nth point, default=1 Returns: energies (DataContainer) """ cts = self.analysed[0].map_en_section[e_type].column_titles energies = DataContainer(cts) for qdo in self.analysed: rows = qdo.map_en_section[e_type].get_rows() for row in rows[::stride]: energies.add_row(row) return energies
def get_offdiags(self, percent_skip=0, stride=1): """ Get distances from all logfiles combined. Args: percent_skip (int, optional): percent of datapoints in each logfile to skip, default=0 stride (int, optional): use only every Nth point, default=1 Returns: distances (dict): e.g: { "13_31": DataContainer, "13_18": DataContainer } """ coltitles = list(self.analysed[0].data_offdiags.get_column_titles()) dists = DataContainer(coltitles) for qad in self.analysed: rows = qad.data_offdiags.get_rows() skip = int(round(len(rows) * percent_skip / 100.0)) for row in rows[skip::stride]: dists.add_row(row) return dists
def get_temps(self, percent_skip=0, stride=1): """ Get temperatures from all logfiles combined. Args: percent_skip (int, optional): percent of datapoints in each logfile to skip, default=0 stride (int, optional): use only every Nth point, default=1 Returns: temperatures (DataContainer) """ # "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent" cts = list(self.analysed[0].data_temp.get_column_titles()) temps = DataContainer(cts) for qad in self.analysed: rows = qad.data_temp.get_rows() skip = int(round(len(rows) * percent_skip / 100.0)) for row in rows[skip::stride]: temps.add_row(row) return temps
def get_energies(self, e_type, percent_skip=0, stride=1): """ Get energies from all logfiles combined. Args: e_type (string): keys in QAnalyseDyn.map_en_section dictionary percent_skip (int, optional): percent of datapoints in each logfile to skip, default=0 stride (int, optional): use only every Nth point, default=1 Returns: energies (DataContainer) """ cts = list(self.analysed[0].map_en_section[e_type].get_column_titles()) energies = DataContainer(cts) for qad in self.analysed: rows = qad.map_en_section[e_type].get_rows() skip = int(round(len(rows) * percent_skip / 100.0)) for row in rows[skip::stride]: energies.add_row(row) return energies
def _parse_dyn(self): """Parses the dynamics part of the Qdyn output (called by init) Extracts all the temperatures, energies, Q energies and off-diagonals. """ # tmp temperature vars t_free, t_tot = None, None temps_q6 = {"Total": [], "Free": [], "Solute": [], "Solvent": [], "time": []} # tmp offdiagonal vars tmp_offdiags = ODict() for atom1, atom2 in self.header.offdiagonals: k = "{}_{}".format(atom1, atom2) tmp_offdiags[k] = [] time = self.time_begin insection = False step = 0 with gzopen(self._qdyn_output) as qdyn_output: qdyn_output.seek(self._header_length) for line in qdyn_output: lf = line.split() if not lf: continue if "Initialising dynamics" in line: raise QDynOutputError("Found more than one qdyn_output...", "Please don't concatenate...") # Temperature if self.header.qdyn_version > "6": if "temperature at step" in line: # fix for large step numbers lf = line.replace("step", "step ") lf = lf.replace("System", "").split() t_type, t, step = lf[0], float(lf[6]), int(lf[4]) temps_q6[t_type].append(t) if t_type == "Total": time = step * self.header.stepsize \ * self._timeconv + self.time_begin temps_q6["time"].append(time) else: # second line with temps (pre Q6) if t_free != None: # second line with temps try: tf_solute = float(lf[1]) except: # gas phase tf_solute = 0 try: tf_solvent = float(lf[3]) except: # gas phase tf_solvent = 0 self.data_temp.add_row((time, t_tot, t_free, tf_solute, tf_solvent)) t_free, t_tot = None, None # first line with temps (pre Q6) elif "Temperature at step" in line: # fix for large step numbers lf = line.replace("step", "step ").split() step = int(lf[3].strip(":")) time = step * self.header.stepsize \ * self._timeconv + self.time_begin t_tot, t_free = float(lf[5]), float(lf[7]) if "Energy summary at step" in line or \ "Q-atom energies at step" in line: insection = True step = int(lf[5]) time = step * self.header.stepsize \ * self._timeconv + self.time_begin elif "FINAL Energy summary" in line or \ "FINAL Q-atom energies" in line: insection = True time = self.header.nsteps * self.header.stepsize \ * self._timeconv + self.time_begin elif "===================================================="\ "======================" in line: insection = False # skip the 0th step if step == 0: continue elif insection: key = lf[0] if key in self.map_en_section: row = [time,] + [float(x) for x in lf[1:]] self.map_en_section[key].add_row(row) elif key in self.map_qen_section: evb_index = int(lf[1]) - 1 row = [time,] + [float(x) for x in lf[2:]] self.map_qen_section[key][evb_index].add_row(row) elif "dist. between" in line: atom1, atom2, dist = lf[8], lf[9], float(lf[11]) k = "{}_{}".format(atom1, atom2) tmp_offdiags[k].append([time, dist]) # join temperatures to one DataContainer (Q6+) for i, time in enumerate(temps_q6["time"]): try: t_solv = temps_q6["Solvent"][i] except IndexError: # gas phase t_solv = 0 self.data_temp.add_row((time, temps_q6["Total"][i], temps_q6["Free"][i], temps_q6["Solute"][i], t_solv)) # join Offdiagonal distances to single DataContainer offd_keys = list(tmp_offdiags.keys()) cts = ["Time",] + offd_keys self.data_offdiags = DataContainer(cts) for i, (time, _) in enumerate(list(tmp_offdiags.values())[0]): row = [time,] + [tmp_offdiags[k][i][1] for k in offd_keys] self.data_offdiags.add_row(row)
def calcall(self): """Run the GC calcs, update .gcs, .failed and .gcs_stats. """ semaphore = threading.BoundedSemaphore(self._nthreads) self._qcalc_io.clear() self.gcs.clear() self.gcs_stats.delete_rows() self.failed.clear() threads = [] for calcdir in self._calcdirs: threads.append(_QGroupContribThread(self, semaphore, calcdir)) threads[-1].start() for t in threads: while t.isAlive(): t.join(1.0) if self.kill_event.is_set(): try: t.qcalc.process.terminate() except Exception as e: pass return if t.error: self.failed[t.calcdir] = t.error else: self._qcalc_io[t.calcdir] = (t.qinps, t.qouts) # parse the output for results and # calculate LRAs for each dir for _dir, (_, qouts) in self._qcalc_io.iteritems(): gcs = [] failed_flag = False for qout in qouts: try: qco = QCalcOutput(qout) res = qco.results["gc"] if not self.qcalc_version: self.qcalc_version = qco.qcalc_version except (QCalcError, KeyError) as error_msg: self.failed[_dir] = error_msg failed_flag = True break gc = {} for row in res.get_rows(): resid, vdw, el = int(row[0]), float(row[1]), float(row[2]) gc[resid] = {"vdw": vdw, "el": el} gcs.append(gc) if failed_flag: continue resids = sorted(gcs[0].keys()) resnames = [ self._pdb_qstruct.residues[ri - 1].name for ri in resids ] # do the LRA thingy # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2) # REORG = <E2-E1>_conf1 - LRA e2e1_st1_vdw = [ gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids ] e2e1_st1_el = [ gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids ] e2e1_st2_vdw = [ gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids ] e2e1_st2_el = [ gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids ] # super efficient stuff here vdw_lra = [ 0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)] vdw_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el) ] # scale the ionized residues if abs(self._scale_ionized - 1.0) > 1e-7: for i, resname in enumerate(resnames): if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"): e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized el_lra[i] = el_lra[i] / self._scale_ionized el_reorg[i] = el_reorg[i] / self._scale_ionized # write the DataContainer lambda1_st1 = self._lambdas_A[0] lambda2_st1 = self._lambdas_B[0] gc_lra = DataContainer([ "Residue_id", "Residue name", "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "REORG_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized) ]) for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el, e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra, vdw_reorg, el_reorg): gc_lra.add_row(row) self.gcs[_dir] = gc_lra # get GC stats over all directories self.gcs_stats.delete_rows() gcs = {} for _, gc in self.gcs.iteritems(): for row in gc.get_rows(): resid, resname = row[0:2] res_key = "{}.{}".format(resid, resname) values = [[ val, ] for val in row[2:]] if not gcs.has_key(res_key): gcs[res_key] = values else: for i, val in enumerate(gcs[res_key]): val.extend(values[i]) # iterate through each residue and calculate # means and stdevs # (sort by residue index) for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])): rc = gcs[res_key] resid, resname = res_key.split(".") # get mean and stdev rc_stats = [ int(resid), resname, len(rc[0]), np.mean(rc[0]), np.std(rc[0]), # <E2-E1>1 vdw np.mean(rc[1]), np.std(rc[1]), # <E2-E1>1 el np.mean(rc[2]), np.std(rc[2]), # <E2-E1>2 vdw np.mean(rc[3]), np.std(rc[3]), # <E2-E1>2 el np.mean(rc[4]), np.std(rc[4]), # LRA vdw np.mean(rc[5]), np.std(rc[5]), # LRA el np.mean(rc[6]), np.std(rc[6]), # REORG vdw np.mean(rc[7]), np.std(rc[7]) ] # REORG el self.gcs_stats.add_row(rc_stats)
def __init__(self, qdyn_output, time_unit="ps", step_size=None, start_time=0): self._qdyn_output = qdyn_output _MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6} if time_unit.lower() not in _MAP_TIME: raise QDynOutputError("Timeunit has to be either 'fs'," "'ps' or 'ns'") self._timeconv = _MAP_TIME[time_unit.lower()] self._stepsize_user = step_size # parse the header self.time_begin = start_time self.time_unit = time_unit.lower() self._parse_header() ### Datacontainer variables for storing all the data # temperature self.data_temp = DataContainer(["Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"]) # energies columns1 = ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"] columns2 = ["Time", "Total", "Fix", "Solvent_rad", "Solvent_pol", "Shell", "Solute"] columns3 = ["Time", "Total", "Potential", "Kinetic"] self.data_E_solute = DataContainer(columns1) self.data_E_solvent = DataContainer(columns1) self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"]) self.data_E_LRF = DataContainer(["Time", "El"]) self.data_E_Q_atom = DataContainer(columns1) self.data_E_restraints = DataContainer(columns2) self.data_E_SUM = DataContainer(columns3) # Q energies q_columns1 = ("Time", "Lambda", "El", "VdW") q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond", "Angle", "Torsion", "Improper") q_columns3 = ("Time", "Lambda", "Total", "Restraint") self.data_EQ_Q, self.data_EQ_prot = [], [] self.data_EQ_wat, self.data_EQ_surr = [], [] self.data_EQ_any, self.data_EQ_SUM = [], [] for i in range(self.header.nstates): self.data_EQ_Q.append(DataContainer(q_columns1)) self.data_EQ_prot.append(DataContainer(q_columns1)) self.data_EQ_wat.append(DataContainer(q_columns1)) self.data_EQ_surr.append(DataContainer(q_columns1)) self.data_EQ_any.append(DataContainer(q_columns2)) self.data_EQ_SUM.append(DataContainer(q_columns3)) # mapping of energy types (label in the output) with containers self.map_en_section = ODict([("solute", self.data_E_solute), ("solvent", self.data_E_solvent), ("solute-solvent", self.data_E_solute_solvent), ("LRF", self.data_E_LRF), ("Q-atom", self.data_E_Q_atom), ("restraints", self.data_E_restraints), ("SUM", self.data_E_SUM)]) self.map_qen_section = ODict([("Q-Q", self.data_EQ_Q), ("Q-prot", self.data_EQ_prot), ("Q-wat", self.data_EQ_wat), ("Q-surr.", self.data_EQ_surr), ("Q-any", self.data_EQ_any), ("Q-SUM", self.data_EQ_SUM)]) # parse the rest self._parse_dyn() self.time_end = self.header.nsteps \ * self.header.stepsize \ * self._timeconv \ + self.time_begin
class QDynOutput(object): """Class for parsing Qdyn output and storing the data. Supports Qdyn versions 5.10 or higher. Typically used indirectly by wrapper QAnalyseDyns. Args: qdyn_output (string): Qdyn output filename time_unit (string): fs,ps,ns (optional, default is ps) step_size (float): use in case the output reads 0.000 start_time (float): redefine the start time in given units\ in case of continuation simulation (default is 0) Examples: # Load a qdyn output >>> qdo = QDynOutput("qdyn.log") # list >>> print qdo.data_EQ_Q[0].get_rows(["Time", "El"]) # print out the Q-Q electrostatic energy >>> print qdo.data_EQ_Q[0].get_rows(["Time", "El"]) """ # TODO: write examples above def __init__(self, qdyn_output, time_unit="ps", step_size=None, start_time=0): self._qdyn_output = qdyn_output _MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6} if time_unit.lower() not in _MAP_TIME: raise QDynOutputError("Timeunit has to be either 'fs'," "'ps' or 'ns'") self._timeconv = _MAP_TIME[time_unit.lower()] self._stepsize_user = step_size # parse the header self.time_begin = start_time self.time_unit = time_unit.lower() self._parse_header() ### Datacontainer variables for storing all the data # temperature self.data_temp = DataContainer(["Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"]) # energies columns1 = ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"] columns2 = ["Time", "Total", "Fix", "Solvent_rad", "Solvent_pol", "Shell", "Solute"] columns3 = ["Time", "Total", "Potential", "Kinetic"] self.data_E_solute = DataContainer(columns1) self.data_E_solvent = DataContainer(columns1) self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"]) self.data_E_LRF = DataContainer(["Time", "El"]) self.data_E_Q_atom = DataContainer(columns1) self.data_E_restraints = DataContainer(columns2) self.data_E_SUM = DataContainer(columns3) # Q energies q_columns1 = ("Time", "Lambda", "El", "VdW") q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond", "Angle", "Torsion", "Improper") q_columns3 = ("Time", "Lambda", "Total", "Restraint") self.data_EQ_Q, self.data_EQ_prot = [], [] self.data_EQ_wat, self.data_EQ_surr = [], [] self.data_EQ_any, self.data_EQ_SUM = [], [] for i in range(self.header.nstates): self.data_EQ_Q.append(DataContainer(q_columns1)) self.data_EQ_prot.append(DataContainer(q_columns1)) self.data_EQ_wat.append(DataContainer(q_columns1)) self.data_EQ_surr.append(DataContainer(q_columns1)) self.data_EQ_any.append(DataContainer(q_columns2)) self.data_EQ_SUM.append(DataContainer(q_columns3)) # mapping of energy types (label in the output) with containers self.map_en_section = ODict([("solute", self.data_E_solute), ("solvent", self.data_E_solvent), ("solute-solvent", self.data_E_solute_solvent), ("LRF", self.data_E_LRF), ("Q-atom", self.data_E_Q_atom), ("restraints", self.data_E_restraints), ("SUM", self.data_E_SUM)]) self.map_qen_section = ODict([("Q-Q", self.data_EQ_Q), ("Q-prot", self.data_EQ_prot), ("Q-wat", self.data_EQ_wat), ("Q-surr.", self.data_EQ_surr), ("Q-any", self.data_EQ_any), ("Q-SUM", self.data_EQ_SUM)]) # parse the rest self._parse_dyn() self.time_end = self.header.nsteps \ * self.header.stepsize \ * self._timeconv \ + self.time_begin def _parse_header(self): """Parses the header of the Qdyn output (called by init) """ header_string = "" try: with gzopen(self._qdyn_output) as qdo: for line in qdo: header_string += line if "Initialising dynamics" in line: break except IOError as e: raise QDynOutputError("Could not read the Qdyn output: {}" "".format(e)) self.header = _QDynHeader(header_string, step_size=self._stepsize_user) self._header_length = len(header_string) def _parse_dyn(self): """Parses the dynamics part of the Qdyn output (called by init) Extracts all the temperatures, energies, Q energies and off-diagonals. """ # tmp temperature vars t_free, t_tot = None, None temps_q6 = {"Total": [], "Free": [], "Solute": [], "Solvent": [], "time": []} # tmp offdiagonal vars tmp_offdiags = ODict() for atom1, atom2 in self.header.offdiagonals: k = "{}_{}".format(atom1, atom2) tmp_offdiags[k] = [] time = self.time_begin insection = False step = 0 with gzopen(self._qdyn_output) as qdyn_output: qdyn_output.seek(self._header_length) for line in qdyn_output: lf = line.split() if not lf: continue if "Initialising dynamics" in line: raise QDynOutputError("Found more than one qdyn_output...", "Please don't concatenate...") # Temperature if self.header.qdyn_version > "6": if "temperature at step" in line: # fix for large step numbers lf = line.replace("step", "step ") lf = lf.replace("System", "").split() t_type, t, step = lf[0], float(lf[6]), int(lf[4]) temps_q6[t_type].append(t) if t_type == "Total": time = step * self.header.stepsize \ * self._timeconv + self.time_begin temps_q6["time"].append(time) else: # second line with temps (pre Q6) if t_free != None: # second line with temps try: tf_solute = float(lf[1]) except: # gas phase tf_solute = 0 try: tf_solvent = float(lf[3]) except: # gas phase tf_solvent = 0 self.data_temp.add_row((time, t_tot, t_free, tf_solute, tf_solvent)) t_free, t_tot = None, None # first line with temps (pre Q6) elif "Temperature at step" in line: # fix for large step numbers lf = line.replace("step", "step ").split() step = int(lf[3].strip(":")) time = step * self.header.stepsize \ * self._timeconv + self.time_begin t_tot, t_free = float(lf[5]), float(lf[7]) if "Energy summary at step" in line or \ "Q-atom energies at step" in line: insection = True step = int(lf[5]) time = step * self.header.stepsize \ * self._timeconv + self.time_begin elif "FINAL Energy summary" in line or \ "FINAL Q-atom energies" in line: insection = True time = self.header.nsteps * self.header.stepsize \ * self._timeconv + self.time_begin elif "===================================================="\ "======================" in line: insection = False # skip the 0th step if step == 0: continue elif insection: key = lf[0] if key in self.map_en_section: row = [time,] + [float(x) for x in lf[1:]] self.map_en_section[key].add_row(row) elif key in self.map_qen_section: evb_index = int(lf[1]) - 1 row = [time,] + [float(x) for x in lf[2:]] self.map_qen_section[key][evb_index].add_row(row) elif "dist. between" in line: atom1, atom2, dist = lf[8], lf[9], float(lf[11]) k = "{}_{}".format(atom1, atom2) tmp_offdiags[k].append([time, dist]) # join temperatures to one DataContainer (Q6+) for i, time in enumerate(temps_q6["time"]): try: t_solv = temps_q6["Solvent"][i] except IndexError: # gas phase t_solv = 0 self.data_temp.add_row((time, temps_q6["Total"][i], temps_q6["Free"][i], temps_q6["Solute"][i], t_solv)) # join Offdiagonal distances to single DataContainer offd_keys = list(tmp_offdiags.keys()) cts = ["Time",] + offd_keys self.data_offdiags = DataContainer(cts) for i, (time, _) in enumerate(list(tmp_offdiags.values())[0]): row = [time,] + [tmp_offdiags[k][i][1] for k in offd_keys] self.data_offdiags.add_row(row)
class QGroupContrib(object): """Class for calculating LRA group contributions from EVB trajectories. Args: qcalc_exec (string): path of qcalc executable calcdirs (list of strings): list of directories pdb_file (string): PDB created with qprep en_list_fn (string): energy-files-list filename lambdas_A (tuple of floats): lambdas of state A (1.0, 0.0) lambdas_B (tuple of floats): lambdas of state B (0.5, 0.5) resid_first (int): index of first residue used for calcs resid_last (int): index of last residue used for calcs scale_ionized (float): scale down ionized residues (ARG, LYS, \ HIP, GLU, ASP) by this factor \ (see doi:10.1021/jp962478o) nthreads (int): number of threads qmask (list of ints): list of atom indexes to be used as the\ Q mask for the GC calculations. By default,\ this is obtained from the FEP file. """ def __init__(self, qcalc_exec, calcdirs, pdb_file, en_list_fn, lambdas_A, lambdas_B, resid_first, resid_last, scale_ionized, nthreads, qmask=None): self._en_list_fn = en_list_fn self._qcalc_exec = qcalc_exec try: self._pdb_qstruct = QStruct(pdb_file, "pdb") except QStructError as error_msg: raise QGroupContribError("Can't parse PDB file '{}': {}" "".format(pdb_file, error_msg)) self._calcdirs = [os.path.relpath(cd) for cd in calcdirs] self._nthreads = nthreads self._lambdas_A = lambdas_A self._lambdas_B = lambdas_B self._resid_first = resid_first self._resid_last = resid_last self._scale_ionized = scale_ionized self._qmask = qmask self._qcalc_io = ODict() self.gcs = ODict() self.failed = ODict() self.qcalc_version = None self.kill_event = threading.Event() lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0] sci = self._scale_ionized colnames = [ "Residue id", "Residue name", "N", "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean" "".format(lambda1_st1, lambda2_st1, sci), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev" "".format(lambda1_st1, lambda2_st1, sci), "REORG_VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "REORG_VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean" "".format(lambda1_st1, lambda2_st1, sci), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev" "".format(lambda1_st1, lambda2_st1, sci) ] self.gcs_stats = DataContainer(colnames) def calcall(self): """Run the GC calcs, update .gcs, .failed and .gcs_stats. """ semaphore = threading.BoundedSemaphore(self._nthreads) self._qcalc_io.clear() self.gcs.clear() self.gcs_stats.delete_rows() self.failed.clear() threads = [] for calcdir in self._calcdirs: threads.append(_QGroupContribThread(self, semaphore, calcdir)) threads[-1].start() for t in threads: while t.isAlive(): t.join(1.0) if self.kill_event.is_set(): try: t.qcalc.process.terminate() except Exception as e: pass return if t.error: self.failed[t.calcdir] = t.error else: self._qcalc_io[t.calcdir] = (t.qinps, t.qouts) # parse the output for results and # calculate LRAs for each dir for _dir, (_, qouts) in self._qcalc_io.iteritems(): gcs = [] failed_flag = False for qout in qouts: try: qco = QCalcOutput(qout) res = qco.results["gc"] if not self.qcalc_version: self.qcalc_version = qco.qcalc_version except (QCalcError, KeyError) as error_msg: self.failed[_dir] = error_msg failed_flag = True break gc = {} for row in res.get_rows(): resid, vdw, el = int(row[0]), float(row[1]), float(row[2]) gc[resid] = {"vdw": vdw, "el": el} gcs.append(gc) if failed_flag: continue resids = sorted(gcs[0].keys()) resnames = [ self._pdb_qstruct.residues[ri - 1].name for ri in resids ] # do the LRA thingy # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2) # REORG = <E2-E1>_conf1 - LRA e2e1_st1_vdw = [ gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids ] e2e1_st1_el = [ gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids ] e2e1_st2_vdw = [ gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids ] e2e1_st2_el = [ gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids ] # super efficient stuff here vdw_lra = [ 0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)] vdw_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el) ] # scale the ionized residues if abs(self._scale_ionized - 1.0) > 1e-7: for i, resname in enumerate(resnames): if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"): e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized el_lra[i] = el_lra[i] / self._scale_ionized el_reorg[i] = el_reorg[i] / self._scale_ionized # write the DataContainer lambda1_st1 = self._lambdas_A[0] lambda2_st1 = self._lambdas_B[0] gc_lra = DataContainer([ "Residue_id", "Residue name", "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "REORG_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized) ]) for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el, e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra, vdw_reorg, el_reorg): gc_lra.add_row(row) self.gcs[_dir] = gc_lra # get GC stats over all directories self.gcs_stats.delete_rows() gcs = {} for _, gc in self.gcs.iteritems(): for row in gc.get_rows(): resid, resname = row[0:2] res_key = "{}.{}".format(resid, resname) values = [[ val, ] for val in row[2:]] if not gcs.has_key(res_key): gcs[res_key] = values else: for i, val in enumerate(gcs[res_key]): val.extend(values[i]) # iterate through each residue and calculate # means and stdevs # (sort by residue index) for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])): rc = gcs[res_key] resid, resname = res_key.split(".") # get mean and stdev rc_stats = [ int(resid), resname, len(rc[0]), np.mean(rc[0]), np.std(rc[0]), # <E2-E1>1 vdw np.mean(rc[1]), np.std(rc[1]), # <E2-E1>1 el np.mean(rc[2]), np.std(rc[2]), # <E2-E1>2 vdw np.mean(rc[3]), np.std(rc[3]), # <E2-E1>2 el np.mean(rc[4]), np.std(rc[4]), # LRA vdw np.mean(rc[5]), np.std(rc[5]), # LRA el np.mean(rc[6]), np.std(rc[6]), # REORG vdw np.mean(rc[7]), np.std(rc[7]) ] # REORG el self.gcs_stats.add_row(rc_stats) def _calcsingle(self, calcdir, qcalc): # find input files with given lambdas # (and correct energy files) # extract information and run qcalc for each combination # fep_000_1.000.dcd, "1.00 0.00" # fep_000_1.000.dcd, "0.00 0.00" # fep_050_0.000.dcd, "1.00 0.00" # fep_050_0.000.dcd, "0.00 0.00" # return input output strings as a tuple of lists of strings # ( [inp1, inp2, inp3, inp4], [out1, out2, out3, out4] ) # or raise QGroupContribError on failure # get the list of energy-files try: en_list_fn = os.path.join(calcdir, self._en_list_fn) en_list_fn_str = open(en_list_fn, 'r').read() except IOError: raise QGroupContribError("No energy-files list '{}'." "".format(self._en_list_fn)) en_list = [enf for enf in en_list_fn_str.split("\n") \ if enf.strip() != ""] if not en_list: raise QGroupContribError("No energy files in '{}'." "".format(self._en_list_fn)) # parse all input files in calcdir for # a valid energy file and lambda values inp_fns = [inp for inp in os.listdir(calcdir) if inp.endswith(".inp")] lambda_inp_map = {} for inp in inp_fns: try: inp_file = os.path.join(calcdir, inp) qdi = QDynInput(input_string=open(inp_file, "r").read()) except (IOError, QDynInputError) as error_msg: logger.debug("Error reading Q input '{}': {}" "".format(inp, error_msg)) continue try: lambda_st1 = float(qdi.parameters["lambdas"].split()[0]) en_file = qdi.parameters["files"]["energy"] except KeyError: logger.debug("Input '{}' missing lambda or energy file" "".format(inp)) continue if en_file not in en_list: continue lambda_key = "{:.6f}".format(lambda_st1) try: inp2 = lambda_inp_map[lambda_key][0] except KeyError: lambda_inp_map[lambda_key] = (inp, qdi) else: raise QGroupContribError("Same lambda values in Qdyn " "inputs: '{}', '{}' ??" "".format(inp, inp2)) # get inputs that match specified state1 lambda values lambdas_st1 = (self._lambdas_A[0], self._lambdas_B[0]) try: inputs = [] for lamb_st1 in lambdas_st1: lamb_key = "{:.6f}".format(lamb_st1) inputs.append(lambda_inp_map[lamb_key]) except KeyError: raise QGroupContribError("QDyn input with lambda=='{}' " "(and energy file in '{}') not found." "".format(lamb_st1, en_list_fn)) # get topology, fep and trajectory filenames from the inputs top_fn, fep_fn, dcd_fns = None, None, [] for inp, qdi in inputs: try: tmp_top_fn = qdi.parameters["files"]["topology"] except KeyError: raise QGroupContribError("Topology not found in Qdyn " "input '{}'.".format(inp)) if top_fn and top_fn != tmp_top_fn: raise QGroupContribError("Qdyn inputs with different " "topologies: '{}', '{}' ??" "".format(top_fn, tmp_top_fn)) try: tmp_fep_fn = qdi.parameters["files"]["fep"] except KeyError: raise QGroupContribError("Fep file not found in Qdyn " "input '{}'.".format(inp)) if fep_fn and fep_fn != tmp_fep_fn: raise QGroupContribError("Qdyn inputs with different " "fep files: '{}', '{}' ??" "".format(fep_fn, tmp_fep_fn)) try: tmp_dcd_fn = qdi.parameters["files"]["trajectory"] except KeyError: raise QGroupContribError("Trajectory file not found in Qdyn " "input '{}'.".format(inp)) top_fn = tmp_top_fn fep_fn = tmp_fep_fn dcd_fns.append(tmp_dcd_fn) # check if files are missing for fn in [top_fn, fep_fn] + dcd_fns: if not os.path.lexists(os.path.join(calcdir, fn)): raise QGroupContribError("Missing file: {}".format(fn)) if not self._qmask: # parse fep for q atom numbers with open(os.path.join(calcdir, fep_fn), "r") as fep: section = "" q_atoms = [] for line in fep.readlines(): line = line.split("#")[0].split("!")[0].strip() if line == "": continue elif line[0] == "[": section = line elif section == "[atoms]": q_atoms.append(line.split()[1]) else: q_atoms = self._qmask masks = ["{} {}".format(ai, ai) for ai in q_atoms] # make qcalc inputs for every combination of # configuration (dcd) and potential (lambda), # run them and return the inputs and outputs combs = ( (dcd_fns[0], self._lambdas_A), # E1_conf1 (dcd_fns[0], self._lambdas_B), # E2_conf1 (dcd_fns[1], self._lambdas_A), # E1_conf2 (dcd_fns[1], self._lambdas_B)) # E2_conf2 # example with lambdas "1.00 0.00" and "0.50 0.50": # # fep_000_1.000.dcd, (1.00, 0.00) # fep_000_1.000.dcd, (0.50, 0.50) # fep_025_0.500.dcd, (1.00, 0.00) # fep_025_0.500.dcd, (0.50, 0.50) input_strings = [] output_strings = [] for dcdfile, lambdas in combs: qci = QCalcInput(top_fn, [ dcdfile, ], fep_fn, lambdas) qci.add_residue_nb_mon(self._resid_first, self._resid_last, masks) qcalc_inp_str = qci.get_string() try: qcalc_out_str = qcalc.run(qcalc_inp_str, workdir=calcdir) except QCalcError as error_msg: raise QGroupContribError(error_msg) input_strings.append(qcalc_inp_str) output_strings.append(qcalc_out_str) return (input_strings, output_strings) @property def details(self): fails = "\n".join(["{}: {}".format(cd, e) \ for cd, e in self.failed.iteritems()]) calcdirs = ", ".join(self._calcdirs) outstr = """ ---------------------------------- GC details --------------------------------- # Calculated with: Qtools ({version}), Qcalc ({qcalc_version}) # Qcalc path: {qcalc_exec} # Work dir: {cwd} # Date: {date} # CMDline: {cmdline} Directories: {dirs} Fails: {fails} ------------------------------------------------------------------------------- """.format(version=__version__, cwd=os.getcwd(), date=time.ctime(), cmdline=" ".join(sys.argv), qcalc_version=self.qcalc_version, fails=fails or "None", dirs=calcdirs, qcalc_exec=os.path.abspath(self._qcalc_exec)) return outstr @property def plotdata(self): """Return GC data as a dictionary of PlotData objects. Example keys in returned dictionary: 'gc_lra_el': PlotData of electrostatic LRA group contributions, one subplot - means vs residue index 'gc_lra_el_top': PlotData of top 20 electrostatic LRA GCs one subplot - means vs "resid.resname" 'gc_lra_vdw': PlotData of vdw LRA GCs, one subplot - means vs residue indexes 'gc_reorg_el': PlotData of el. 'REORG' group contributions, one subplot - means vs residue index 'gc_de1_el': PlotData of electrostatic <E1 - E2>_1, one subplot - means vs residue index 'gc_de2_el': PlotData of electrostatic <E1 - E2>_2, one subplot - means vs residue index """ plots = ODict() # all failed if not self.gcs: return plots lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0] # make PlotData objects plots["gc_lra_el_top"] = PlotData("Top LRA GC (El, {}->{}, iscale={})," " top 20".format( lamb1, lamb2, self._scale_ionized), xlabel="Residue", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_reorg_el_top"] = PlotData( "Top REORG GC (El, {}->{}, iscale={})," " top 20".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_lra_el"] = PlotData("LRA GC (El, {}->{}, iscale={})" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_lra_vdw"] = PlotData("LRA GC (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_reorg_el"] = PlotData("REORG GC (El, {}->{}, iscale={})" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_reorg_vdw"] = PlotData("REORG GC (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de1_el"] = PlotData("<E1-E2>_1 (El, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de1_vdw"] = PlotData("<E1-E2>_1 (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de2_el"] = PlotData("<E1-E2>_2 (El, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de2_vdw"] = PlotData("<E1-E2>_2 (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") cols = self.gcs_stats.get_columns() resids = cols[0] title = "mean_N={}".format(len(self.gcs)) plots["gc_de1_vdw"].add_subplot(title, resids, cols[3], yerror=cols[4]) plots["gc_de1_el"].add_subplot(title, resids, cols[5], yerror=cols[6]) plots["gc_de2_vdw"].add_subplot(title, resids, cols[7], yerror=cols[8]) plots["gc_de2_el"].add_subplot(title, resids, cols[9], yerror=cols[10]) plots["gc_lra_vdw"].add_subplot(title, resids, cols[11], yerror=cols[12]) plots["gc_lra_el"].add_subplot(title, resids, cols[13], yerror=cols[14]) plots["gc_reorg_vdw"].add_subplot(title, resids, cols[15], yerror=cols[16]) plots["gc_reorg_el"].add_subplot(title, resids, cols[17], yerror=cols[18]) # top 20 LRA el sorted_rows = sorted(self.gcs_stats.get_rows(), key=lambda x: -abs(x[5]))[:20] cols = zip(*sorted_rows) resids, resnames = cols[0], cols[1] keys = ["{}_{}".format(rn.capitalize(), ri) \ for ri, rn in zip(resids, resnames)] els, elstd = cols[13], cols[14] plots["gc_lra_el_top"].add_subplot(title, keys, els, yerror=elstd) # top 20 reorg el sorted_rows = sorted(self.gcs_stats.get_rows(), key=lambda x: -abs(x[9]))[:20] cols = zip(*sorted_rows) resids, resnames = cols[0], cols[1] keys = ["{}_{}".format(rn.capitalize(), ri) \ for ri, rn in zip(resids, resnames)] els, elstd = cols[17], cols[18] plots["gc_reorg_el_top"].add_subplot(title, keys, els, yerror=elstd) return plots def get_pdbgc(self): """Return the structure in PDB format (string) with added GC values. Fill the Occupancy fields with LRA contributions and Temperature factor fields with REORG contributions. """ try: resids, lras, reorgs = self.gcs_stats.get_columns([0, 13, 17]) except IndexError: resids, lras, reorgs = [], [], [] pdb = [] for mol in self._pdb_qstruct.molecules: for res in mol.residues: try: i = resids.index(res.index) lra_gc, reorg_gc = lras[i], reorgs[i] except ValueError: lra_gc, reorg_gc = 0, 0 for atom in res.atoms: x, y, z = atom.coordinates pdb.append("ATOM {:>5d} {:<4s} {:3s} {:>4d} "\ "{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}"\ "".format(atom.index, atom.name, atom.residue.name, atom.residue.index, x, y, z, lra_gc, reorg_gc)) pdb.append("GAP") return "\n".join(pdb)
class _QFepPart3(object): """Class for parsing and storing data from Part3 in Qfep output. Part3 contains the bin-averaged dGg values, points and squared eigenvectors from Part2. If parsing is unsuccessful QFepOutputError is raised, else all the data is stored in DataContainer object 'data'. Args: part3_string (string): string of Part3 in qfep output Usage: >>> cols = ["Lambda", "dGg"] >>> dGg_lambda = _QFepPart3.data.get_rows(columns=cols) """ _PART3_HEADER = "# bin energy gap <dGg> <dGg norm> pts <c1**2> "\ "<c2**2> <r_xy>" _COLUMN_TITLES = ["bin", "Egap", "dGg", "dGg_norm", "points", "c1**2", "c2**2", "r_xy"] def __init__(self, part3_string): self._part3_string = part3_string self.data = DataContainer(self._COLUMN_TITLES) self._dga = None self._dg0 = None self._maxima_bins = None self._minima_bins = None self.warning = None self._parse() if not self.data.get_rows(): raise QFepOutputError("Part3 is empty (no rows).") def _parse(self): lines = self._part3_string.split('\n') # the first line is a comment lines.pop(0) # comment with column names header = lines.pop(0).strip() if header != self._PART3_HEADER: raise QFepOutputError("Part3 has a wrong header, did the qfep " "binary change?") for line in lines: line = re.split("#|\!", line)[0].strip() if not line: continue row = [float(x) for x in line.split()] self.data.add_row(row) @property def dga(self): if self._dga == None: self._get_dgs() return self._dga @property def dg0(self): if self._dg0 == None: self._get_dgs() return self._dg0 @property def minima_bins(self): if self._minima_bins == None: self._get_dgs() return self._minima_bins @property def maxima_bins(self): if self._maxima_bins == None: self._get_dgs() return self._maxima_bins def _get_dgs(self): # Get minima and maxima without any smoothing. # If there is more than one maxima and less or more than 2 minima, # raise an exception search for maxima only between 0.2*nbins and # 0.8*nbins (bad sampling on the edges can raise an error) # Also, save the bins of the minima. bins, des, dgs = self.data.get_columns(["bin", "Egap", "dGg_norm"]) minima, maxima = [], [] nbins = len(bins) for i in range(1, nbins-1): # from the second to the second last dg, dgnext, dgprev = dgs[i], dgs[i+1], dgs[i-1] if dgprev >= dg and dg < dgnext: minima.append(i) elif dgprev <= dg and dg > dgnext and \ i > nbins*0.2 and i < nbins*0.8: maxima.append(i) if len(minima) > 2 or len(maxima) > 1: # Bad sampling, more minima and maxima than wanted. # Get the highest maxima from those found so far. # Get the absolute minima to the left and to the right of this # maxima. Save the warning. max1 = max(maxima, key=lambda i: dgs[i]) react = [(dgs[i], i) for i in minima if i < max1] prod = [(dgs[i], i) for i in minima if i > max1] try: min1 = min(react)[1] # min() will return tuple with lowest dg min2 = min(prod)[1] except ValueError: # multiple minima on one side, none on the other # (starts/ends at the lowest point) raise QFepOutputError("Bad reaction free energy profile - " "reactants minima: {}, products minima: " "{}".format(len(react), len(prod))) self.warning = "Rough Free energy profile ({} minima and {} "\ "maxima found), look at the graphs!"\ "".format(len(minima), len(maxima)) maxima = [max1,] minima = [min1, min2] if len(minima) != 2: raise QFepOutputError("Bad reaction free energy profile - {} " "local minima (instead of 2)" "".format(len(minima))) elif len(maxima) != 1: raise QFepOutputError("Bad reaction free energy profile - {} " "local maxima (instead of 1)" "".format(len(maxima))) self._dga = dgs[maxima[0]] - dgs[minima[0]] self._dg0 = dgs[minima[1]] - dgs[minima[0]] self._minima_bins = [bins[mini] for mini in minima] self._maxima_bins = [bins[maxi] for maxi in maxima] # adjust the values in data so that the reactants are zero colindex = self.data.column_titles.index("dGg_norm") for row in self.data.get_rows(): row[colindex] = row[colindex] - dgs[minima[0]]
def calc_lra(self, lambda_a, lambda_b): """Calculate LRA and reorganization energies between two states. LRA = 0.5*(<E2-E1>_10+<E2-E1>_01) REO = 0.5*(<E2-E1>_10-<E2-E1>_01) E1 == Potential energy of state A E2 == Potential energy of state B <>_10 == Configuration space A (lambda_a) <>_01 == Configuration space B (lambda_b) E2_10 == Potential energy of state B at lambda_a Args: lambda_a (float): lambda value of first state, usually 1.0 lambda_b (float): lambda value of second state, usually 0.0 Returns: lra (DataContainer): LRA and reorganization energies,\ as well as contributions from\ individual states """ if self._num_evb_states != 2: raise QFepOutputError("LRA works only with two states") lra = DataContainer(["E_type", "(E2-E1)_10", "(E2-E1)_01", "LRA", "REORG"]) e1_a, e1_b, e2_a, e2_b = None, None, None, None # get the appropriate rows of energies # note that these energies are not scaled by lambda # [4:] ignores 'file', 'state', 'points' and 'lambda' for row in self.data_state[0].get_rows(): if abs(row[3] - lambda_a) < 1e-7: e1_a = row[4:] if abs(row[3] - lambda_b) < 1e-7: e1_b = row[4:] # lambda2 in data_state[1] is actually (1-lambda), correct for that for row in self.data_state[1].get_rows(): if abs((1 - row[3]) - lambda_a) < 1e-7: e2_a = row[4:] if abs((1 - row[3]) - lambda_b) < 1e-7: e2_b = row[4:] if not e1_a: raise QFepOutputError("LRA: No energy values for lambda == '{}'" "".format(lambda_a)) if not e1_b: raise QFepOutputError("LRA: No energy values for lambda == '{}'" "".format(lambda_b)) la, lb = lambda_a, lambda_b # calculate total E=(l1*E1 + l2*E2) energies e1_state1 = [la*e1a + (1-la)*e2a for e1a, e2a in zip(e1_a, e2_a)] e1_state2 = [la*e1b + (1-la)*e2b for e1b, e2b in zip(e1_b, e2_b)] e2_state1 = [lb*e1a + (1-lb)*e2a for e1a, e2a in zip(e1_a, e2_a)] e2_state2 = [lb*e1b + (1-lb)*e2b for e1b, e2b in zip(e1_b, e2_b)] # (E2-E1)_10 (reactant state) = First row E2 - E1 # (E2-E1)_01 (products state) = Last row E2 - E1 des_st1 = [e2 - e1 for e1, e2 in zip(e1_state1, e2_state1)] des_st2 = [e2 - e1 for e1, e2 in zip(e1_state2, e2_state2)] # LRA=0.5*(<E2-E1>_10+<E2-E1>_01) # REO=0.5*(<E2-E1>_10-<E2-E1>_01) des_st1_st2 = list(zip(des_st1, des_st2)) es_lra = [0.5 * (de_st1 + de_st2) for de_st1, de_st2 in des_st1_st2] es_reo = [0.5 * (de_st1 - de_st2) for de_st1, de_st2 in des_st1_st2] e_types = self.data_state[0].column_titles[4:] for row in zip(e_types, des_st1, des_st2, es_lra, es_reo): lra.add_row(row) return lra
def _parse(self): # find the version try: self.qcalc_version = self._VERSION_RE.findall(self.qcalc_output)[0] except IndexError: self.qcalc_version = "Unknown, likely ancient" # look for errors err = "\n".join(re.findall("ERROR.*", self.qcalc_output)) if err: raise QCalcError("Errors in qcalc output: {}".format(err)) # parse the list of calculations calc_list = self._CALCLIST_RE.findall(self.qcalc_output) if not calc_list: raise QCalcError("Failed to parse qcalc output") for line in calc_list[0].split("\n"): lf = line.split() calc_i = lf[0] if "Root Mean Square Deviation" in line: self.results[calc_i] = DataContainer(["Frame", "RMSD"]) elif "distance between" in line: self.results[calc_i] = DataContainer(["Frame", "distance"]) # TODO: extract the energy as well elif "distance, bond energy between" in line: self.results[calc_i] = DataContainer(["Frame", "distance"]) # TODO: extract the energy as well elif "distance, qbond energy between" in line: self.results[calc_i] = DataContainer(["Frame", "distance"]) elif "angle between" in line: self.results[calc_i] = DataContainer(["Frame", "angle"]) elif "angle, angle energy between" in line: self.results[calc_i] = DataContainer(["Frame", "angle"]) elif "angle, qangle energy between" in line: self.results[calc_i] = DataContainer(["Frame", "angle"]) elif "torsion between" in line: self.results[calc_i] = DataContainer(["Frame", "torsion"]) elif "torsion, torsion energy between" in line: self.results[calc_i] = DataContainer(["Frame", "torsion"]) elif "torsion, qtorsion energy between" in line: self.results[calc_i] = DataContainer(["Frame", "torsion"]) elif "nonbond monitor for residues" in line: pass else: logger.warning("Ignoring unknown QCalc5 results: {}" "".format(line)) # parse the normal results (distances, rmsds) res_list = self._RES_RE.findall(self.qcalc_output) if not res_list: raise QCalcError("Failed to parse qcalc output") # skip first row (--- Calculation results ---) res_list = res_list[0].split("\n")[1:] colheaders = res_list.pop(0) coltitles = [] colheaders = colheaders.replace(": ", ":") #fix for colheader in colheaders.split(): if ":" in colheader: colheader, calctype = colheader.split(":") if not calctype: continue # residue nonbond calc coltitles.append(colheader) if coltitles and res_list: tmpdata = DataContainer(coltitles) for line in res_list: lf = line.split() if not lf: continue tmpdata.add_row(lf) for k, datac in self.results.items(): for i, v in enumerate(zip(*tmpdata.get_columns(columns=[ k, ]))): datac.add_row((i, float(v[0]))) # parse the average residue nonbond energies (if they exist) res_resnb = self._RESNB_RE.findall(self.qcalc_output) if res_resnb: self.results["gc"] = DataContainer(["Residue", "E_LJ", "E_EL"]) # skip two lines # TODO: extract qatoms indexes? res_resnb = res_resnb[0].split("\n")[2:] for line in res_resnb: lf = line.split() if lf: resid, elj, eel = int(lf[0]), float(lf[1]), float(lf[2]) self.results["gc"].add_row((resid, elj, eel))
def __init__(self, logfile, timeunit="ps", stepsize=None, starttime=0): """ Parses a Q dynamics logfile and extracts data (temperature, energies...) For interfacing, use QAnalyseDyns. Args: logfile (string): path/filename of Q logfile timeunit (string): fs,ps,ns (optional, default is ps) stepsize (float): in case the one in Q is 0.000 (Q printout is a work of art) Usage looks like this: # parse qad = QAnalyseDyns(.....).analysed[0] # print out nicely formatted temperature stats print qad.get_temp_stats() # get averages for seconds half (step >= 50% of steps) of all the temperatures temps = qad.data_temp.get_columns() coltitles = qad.data_temp.get_column_titles() # [ "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent" ] midpoint = int(temps[0][-1])/2 # 0 == "Time", -1 == last frame for i,colt in coltitles[1:]: print colt, np.mean( [ x for j,x in enumerate(temps[i]) if temps[0][j] >= midpoint ] ) # get the potential energy data and just print it out Epot = qad.data_E_SUM.get_columns( ["Time", "Potential"] ) print Epot """ # parse the logfile: # first the header using RE # then dynamics (_parse_dyn()) line by line using the lazy generator in 'open' (less memory consumption and faster than regular expressions) self._logfile = logfile self._starttime = starttime self.MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6} if timeunit not in self.MAP_TIME: raise QAnalyseDynError( "Timeunit has to be either 'fs', 'ps' or 'ns'") self._timeconv = self.MAP_TIME[timeunit] self._header = "" try: with open(self._logfile, 'r') as lf: for line in lf: self._header += line if "Initialising dynamics" in line: break except IOError as e: raise QAnalyseDynError("Could not read the logfile: " + str(e)) # use RE to get some info about the simulations m = re.search("Build number\s*([\d\.]+)", self._header) if m: self._qversion = m.group(1) else: m = re.search('QDyn version 5.06', self._header) if m: self._qversion = '5.06' else: raise QAnalyseDynError("Not a valid Q log file or Q version " "is very old...") m = re.search("Topology file =\s*(\S+)", self._header) if m: self._topfile = m.group(1) else: raise QAnalyseDynError("Couldn't find the topology filename!?") m = re.search("Number of MD steps =\s*(\d+)", self._header) if m: self._md_steps = int(m.group(1)) else: raise QAnalyseDynError("Couldn't find number of steps!?") m = re.search("Stepsize \(fs\) =\s*([\d\.]+)", self._header) if m: self._stepsize = float(m.group(1)) else: raise QAnalyseDynError("Couldn't find the stepsize!?") if not stepsize: if abs(self._stepsize - 0.0) < 1e-8: raise QAnalyseDynError("Can't convert steps to time, stepsize " "is 0.0 in the logfile (Q sucks). Set " "the stepsize please.") else: if self._stepsize: raise QAnalyseDynError("Will not override the non-zero " "stepsize in the logfile...") else: self._stepsize = stepsize m = re.search("FEP input file =\s*(\S+)", self._header) if m: self._fepfile = m.group(1) else: self._fepfile = None if self._fepfile: m = re.search("No. of fep/evb states =\s*(\d+)", self._header) if m: self._evb_states = int(m.group(1)) else: raise QAnalyseDynError("Couldn't find the number of states!?") offdsection = re.search( "(No. of offdiagonal \(Hij\) functions =.*?^$)", self._header, re.MULTILINE | re.DOTALL).group(1) offdgs = re.findall("\s+\d+\s+\d+\s+(\d+)\s+(\d+)\s+[\d\.]+\s+[\d\.]+", offdsection) # # make datacontainer variables for storing all the data # offdiags offdiags = ["{}_{}".format(a1, a2) for a1, a2 in offdgs] self._tmp_offdiags = {} for k in offdiags: self._tmp_offdiags[k] = DataContainer(["Time", "Distance"]) # temperature self.data_temp = DataContainer( ["Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"]) # energies self.data_E_solute = DataContainer( ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"]) self.data_E_solvent = DataContainer( ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"]) self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"]) self.data_E_LRF = DataContainer(["Time", "El"]) self.data_E_Q_atom = DataContainer( ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"]) self.data_E_restraints = DataContainer([ "Time", "Total", "Fix", "Solvent_rad", "Solvent_pol", "Shell", "Solute" ]) self.data_E_SUM = DataContainer( ["Time", "Total", "Potential", "Kinetic"]) # Q energies q_columns1 = ("Time", "Lambda", "El", "VdW") q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond", "Angle", "Torsion", "Improper") q_columns3 = ("Time", "Lambda", "Total", "Restraint") self.data_EQ_Q, self.data_EQ_prot = [], [] self.data_EQ_wat, self.data_EQ_surr = [], [] self.data_EQ_any, self.data_EQ_SUM = [], [] for i in range(self._evb_states): self.data_EQ_Q.append(DataContainer(q_columns1)) self.data_EQ_prot.append(DataContainer(q_columns1)) self.data_EQ_wat.append(DataContainer(q_columns1)) self.data_EQ_surr.append(DataContainer(q_columns1)) self.data_EQ_any.append(DataContainer(q_columns2)) self.data_EQ_SUM.append(DataContainer(q_columns3)) # mapping of energy types (label in the output) with containers self.map_en_section = { "solute": self.data_E_solute, "solvent": self.data_E_solvent, "solute-solvent": self.data_E_solute_solvent, "LRF": self.data_E_LRF, "Q-atom": self.data_E_Q_atom, "SUM": self.data_E_SUM } self.map_qen_section = { "Q-Q": self.data_EQ_Q, "Q-prot": self.data_EQ_prot, "Q-wat": self.data_EQ_wat, "Q-surr.": self.data_EQ_surr, "Q-any": self.data_EQ_any, "Q-SUM": self.data_EQ_SUM } self._parse_dyn() d_dcs = self._tmp_offdiags.values() cts = [ "Time", ] + self._tmp_offdiags.keys() self.data_offdiags = DataContainer(cts) # TODO: clean up this magic below for d_row in zip(*[ d_dcs[0].get_columns([ 0, ])[0], ] + [d_dc.get_columns([ 1, ])[0] for d_dc in d_dcs]): self.data_offdiags.add_row(d_row) self._endtime = self._md_steps * self._stepsize * self._timeconv \ + self._starttime