def get_temp_stats(self, percent_skip=0, stride=1): """ Returns temperature stats in string format (used for cmdline printout) for all logfiles combined """ temps = self.get_temps(percent_skip=percent_skip, stride=stride) tt, tf, tf_solu, tf_solv = temps.get_columns( ("T_tot", "T_free", "T_free_solute", "T_free_solvent")) tt_mean, tt_std = np.mean(tt), np.std(tt) tf_mean, tf_std = np.mean(tf), np.std(tf) tf_solu_mean, tf_solu_std = np.mean(tf_solu), np.std(tf_solu) tf_solv_mean, tf_solv_std = np.mean(tf_solv), np.std(tf_solv) tt_max_dev = max(map(lambda x: abs(x - tt_mean), tt)) tf_max_dev = max(map(lambda x: abs(x - tf_mean), tf)) tf_solu_max_dev = max(map(lambda x: abs(x - tf_solu_mean), tf_solu)) tf_solv_max_dev = max(map(lambda x: abs(x - tf_solv_mean), tf_solv)) outstr = """\ Temperature stats: {0:20s}{1:>20s}{2:>20s}{3:>20s} {4:20s}{5:>20.2f}{6:>20.2f}{7:>20.2f} {8:20s}{9:>20.2f}{10:>20.2f}{11:>20.2f} {12:20s}{13:>20.2f}{14:>20.2f}{15:>20.2f} {16:20s}{17:>20.2f}{18:>20.2f}{19:>20.2f} """.format("", "Mean", "Stdev", "Max.Abs.Dev.", "T_total", tt_mean, tt_std, tt_max_dev, "T_free", tf_mean, tf_std, tf_max_dev, "T_free_solute", tf_solu_mean, tf_solu_std, tf_solu_max_dev, "T_free_solvent", tf_solv_mean, tf_solv_std, tf_solv_max_dev) return outstr
def stats_str(self): """Free energy stats in string format.""" dgas = self.dgas.values() dg0s = self.dg0s.values() dgs_fep = self.dgs_fep.values() allres = {} allres["calc_type"] = self._subcalc_key or "" allres["dg_n"] = len(dgas) allres["dga"] = (np.mean(dgas), np.std(dgas), np.median(dgas), np.std_error(dgas)) allres["dg0"] = (np.mean(dg0s), np.std(dg0s), np.median(dg0s), np.std_error(dg0s)) allres["dg_fep_n"] = len(dgs_fep) allres["dg_fep"] = (np.mean(dgs_fep), np.std(dgs_fep), np.median(dgs_fep), np.std_error(dgs_fep)) return """\ # {calc_type:<15} Mean Std.dev Median Std.error N dG* {dga[0]:10.2f} {dga[1]:10.2f} {dga[2]:10.2f} {dga[3]:10.2f} {dg_n:10} dG0 {dg0[0]:10.2f} {dg0[1]:10.2f} {dg0[2]:10.2f} {dg0[3]:10.2f} {dg_n:10} dG_lambda {dg_fep[0]:10.2f} {dg_fep[1]:10.2f} {dg_fep[2]:10.2f} \ {dg_fep[3]:10.2f} {dg_fep_n:10} """.format(**allres)
def lra_stats(self): """Calculate average and st.dev of LRA and reorg energies.""" average_lras = DataContainer([ "E_type", "(E2-E1)_10_mean", "(E2-E1)_10_std", "(E2-E1)_01_mean", "(E2-E1)_01_std", "LRA_mean", "LRA_std", "REORG_mean", "REORG_std" ]) allvals = [] for lra in self.lras.values(): rows = lra.get_rows() for irow, row in enumerate(rows): try: allvals[irow].append(row) except IndexError: allvals.append([ row, ]) # allvals now looks like this: # [ # [ # ["EQtot", EQtot_de_st1_1, EQtot_de_st2_1, EQtot_lra_1, EQtot_reorg_1], # ["EQtot", EQtot_de_st1_2, EQtot_de_st2_2, ...], ... # ], # [ # ["EQbond", EQbond_de_st1_1, EQbond_de_st2_1, EQbond_lra_1, EQbond_reorg_1], # ["EQbond", EQbond_de_st1_2, EQbond_de_st2_2, ...], ... # ] # ] # for values in allvals: # transpose to get [ ["EQtot","EQtot"...], # [ EQtot_de_st1_1, EQtot_de_st1_2,...], # [ EQtot_de_st2_1, EQtot_de_st2_2,...], ...] values = zip(*values) # now they can be easily averaged and std-ed e_type = values[0][0] de_st1_mean = np.mean(values[1]) de_st2_mean = np.mean(values[2]) lra_mean = np.mean(values[3]) reo_mean = np.mean(values[4]) de_st1_std = np.std(values[1]) de_st2_std = np.std(values[2]) lra_std = np.std(values[3]) reo_std = np.std(values[4]) average_lras.add_row([ e_type, de_st1_mean, de_st1_std, de_st2_mean, de_st2_std, lra_mean, lra_std, reo_mean, reo_std ]) return average_lras
def calcall(self): """Run the GC calcs, update .gcs, .failed and .gcs_stats. """ semaphore = threading.BoundedSemaphore(self._nthreads) self._qcalc_io.clear() self.gcs.clear() self.gcs_stats.delete_rows() self.failed.clear() threads = [] for calcdir in self._calcdirs: threads.append(_QGroupContribThread(self, semaphore, calcdir)) threads[-1].start() for t in threads: while t.isAlive(): t.join(1.0) if self.kill_event.is_set(): try: t.qcalc.process.terminate() except Exception as e: pass return if t.error: self.failed[t.calcdir] = t.error else: self._qcalc_io[t.calcdir] = (t.qinps, t.qouts) # parse the output for results and # calculate LRAs for each dir for _dir, (_, qouts) in self._qcalc_io.iteritems(): gcs = [] failed_flag = False for qout in qouts: try: qco = QCalcOutput(qout) res = qco.results["gc"] if not self.qcalc_version: self.qcalc_version = qco.qcalc_version except (QCalcError, KeyError) as error_msg: self.failed[_dir] = error_msg failed_flag = True break gc = {} for row in res.get_rows(): resid, vdw, el = int(row[0]), float(row[1]), float(row[2]) gc[resid] = {"vdw": vdw, "el": el} gcs.append(gc) if failed_flag: continue resids = sorted(gcs[0].keys()) resnames = [ self._pdb_qstruct.residues[ri - 1].name for ri in resids ] # do the LRA thingy # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2) # REORG = <E2-E1>_conf1 - LRA e2e1_st1_vdw = [ gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids ] e2e1_st1_el = [ gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids ] e2e1_st2_vdw = [ gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids ] e2e1_st2_el = [ gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids ] # super efficient stuff here vdw_lra = [ 0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)] vdw_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw) ] el_reorg = [ 0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el) ] # scale the ionized residues if abs(self._scale_ionized - 1.0) > 1e-7: for i, resname in enumerate(resnames): if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"): e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized el_lra[i] = el_lra[i] / self._scale_ionized el_reorg[i] = el_reorg[i] / self._scale_ionized # write the DataContainer lambda1_st1 = self._lambdas_A[0] lambda2_st1 = self._lambdas_B[0] gc_lra = DataContainer([ "Residue_id", "Residue name", "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "REORG_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized) ]) for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el, e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra, vdw_reorg, el_reorg): gc_lra.add_row(row) self.gcs[_dir] = gc_lra # get GC stats over all directories self.gcs_stats.delete_rows() gcs = {} for _, gc in self.gcs.iteritems(): for row in gc.get_rows(): resid, resname = row[0:2] res_key = "{}.{}".format(resid, resname) values = [[ val, ] for val in row[2:]] if not gcs.has_key(res_key): gcs[res_key] = values else: for i, val in enumerate(gcs[res_key]): val.extend(values[i]) # iterate through each residue and calculate # means and stdevs # (sort by residue index) for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])): rc = gcs[res_key] resid, resname = res_key.split(".") # get mean and stdev rc_stats = [ int(resid), resname, len(rc[0]), np.mean(rc[0]), np.std(rc[0]), # <E2-E1>1 vdw np.mean(rc[1]), np.std(rc[1]), # <E2-E1>1 el np.mean(rc[2]), np.std(rc[2]), # <E2-E1>2 vdw np.mean(rc[3]), np.std(rc[3]), # <E2-E1>2 el np.mean(rc[4]), np.std(rc[4]), # LRA vdw np.mean(rc[5]), np.std(rc[5]), # LRA el np.mean(rc[6]), np.std(rc[6]), # REORG vdw np.mean(rc[7]), np.std(rc[7]) ] # REORG el self.gcs_stats.add_row(rc_stats)
def test_std(self): vals = [i**2 for i in range(1, 21)] assert is_close(np.std(vals), 127.9023064686)