def test_encoder(self): pl = PlotData("random_title", "bar", "xlab", "ylab", "zlab") pl.add_subplot("1.subplot", [1, 2, 3, 4], [2, 4, 8, 16], [2, 4, 8, 16], [1, 2, 3, 4]) pl.add_subplot("2.subplot", [1, 2, 3, 4], [3, 4, 5, 6], [2, 3, 4, 5], [1, 1, 1, 1]) jsonenc = PlotDataJSONEncoder(indent=2, separators=(',', ': ')) ref_data = open("data/PlotData.json").read() assert ref_data == round_json(jsonenc.encode(pl))
def plotdata(self): """Return GC data as a dictionary of PlotData objects. Example keys in returned dictionary: 'gc_lra_el': PlotData of electrostatic LRA group contributions, one subplot - means vs residue index 'gc_lra_el_top': PlotData of top 20 electrostatic LRA GCs one subplot - means vs "resid.resname" 'gc_lra_vdw': PlotData of vdw LRA GCs, one subplot - means vs residue indexes 'gc_reorg_el': PlotData of el. 'REORG' group contributions, one subplot - means vs residue index 'gc_de1_el': PlotData of electrostatic <E1 - E2>_1, one subplot - means vs residue index 'gc_de2_el': PlotData of electrostatic <E1 - E2>_2, one subplot - means vs residue index """ plots = ODict() # all failed if not self.gcs: return plots lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0] # make PlotData objects plots["gc_lra_el_top"] = PlotData("Top LRA GC (El, {}->{}, iscale={})," " top 20".format( lamb1, lamb2, self._scale_ionized), xlabel="Residue", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_reorg_el_top"] = PlotData( "Top REORG GC (El, {}->{}, iscale={})," " top 20".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_lra_el"] = PlotData("LRA GC (El, {}->{}, iscale={})" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_lra_vdw"] = PlotData("LRA GC (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_reorg_el"] = PlotData("REORG GC (El, {}->{}, iscale={})" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_reorg_vdw"] = PlotData("REORG GC (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de1_el"] = PlotData("<E1-E2>_1 (El, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de1_vdw"] = PlotData("<E1-E2>_1 (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de2_el"] = PlotData("<E1-E2>_2 (El, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") plots["gc_de2_vdw"] = PlotData("<E1-E2>_2 (VdW, {}->{})" "".format(lamb1, lamb2), xlabel="Residue index", ylabel="Energy [kcal/mol]", plot_type="bar") cols = self.gcs_stats.get_columns() resids = cols[0] title = "mean_N={}".format(len(self.gcs)) plots["gc_de1_vdw"].add_subplot(title, resids, cols[3], yerror=cols[4]) plots["gc_de1_el"].add_subplot(title, resids, cols[5], yerror=cols[6]) plots["gc_de2_vdw"].add_subplot(title, resids, cols[7], yerror=cols[8]) plots["gc_de2_el"].add_subplot(title, resids, cols[9], yerror=cols[10]) plots["gc_lra_vdw"].add_subplot(title, resids, cols[11], yerror=cols[12]) plots["gc_lra_el"].add_subplot(title, resids, cols[13], yerror=cols[14]) plots["gc_reorg_vdw"].add_subplot(title, resids, cols[15], yerror=cols[16]) plots["gc_reorg_el"].add_subplot(title, resids, cols[17], yerror=cols[18]) # top 20 LRA el sorted_rows = sorted(self.gcs_stats.get_rows(), key=lambda x: -abs(x[5]))[:20] cols = zip(*sorted_rows) resids, resnames = cols[0], cols[1] keys = ["{}_{}".format(rn.capitalize(), ri) \ for ri, rn in zip(resids, resnames)] els, elstd = cols[13], cols[14] plots["gc_lra_el_top"].add_subplot(title, keys, els, yerror=elstd) # top 20 reorg el sorted_rows = sorted(self.gcs_stats.get_rows(), key=lambda x: -abs(x[9]))[:20] cols = zip(*sorted_rows) resids, resnames = cols[0], cols[1] keys = ["{}_{}".format(rn.capitalize(), ri) \ for ri, rn in zip(resids, resnames)] els, elstd = cols[17], cols[18] plots["gc_reorg_el_top"].add_subplot(title, keys, els, yerror=elstd) return plots
def plotdata(self): """Return 'useful data' as a dictionary of PlotData objects. Each qfep_output will be a subplot in one PlotData, except in the case of LRA where there is only one subplot: the average and stdev over all outputs. Useful data: - All energies from part 0 - FEP back, forward and average dG profiles vs lambda - FEP delta (forward - reverse) vs lambda - Sampling profiles - LRA contributions (statistics) - Free energy profiles vs Egap (bin-averaged) - Coefficients vs Egap (part3) """ plots = ODict() # no QFepOutput objects (all failed to parse) if not self.qfos: return plots # make PlotData objects plots["dgde"] = PlotData("Free energy profile", xlabel="E1-E2 [kcal/mol]", ylabel="Free energy [kcal/mol]") if self._lra_lambdas: l1, l2 = self._lra_lambdas lra_de_st1 = "lra_de_st1_{}".format(l1) lra_de_st2 = "lra_de_st2_{}".format(l2) lra_lra = "lra_lra_{}{}".format(l1, l2) lra_reo = "lra_reo_{}{}".format(l1, l2) plots[lra_de_st1] = PlotData("E2-E1 (lambda={})".format(l1), xlabel="Energy type", ylabel="Potential energy [kcal/mol]", plot_type="bar") plots[lra_de_st2] = PlotData("E2-E1 (lambda={})".format(l2), xlabel="Energy type", ylabel="Potential energy [kcal/mol]", plot_type="bar") plots[lra_lra] = PlotData("LRA (l={} -> l={})".format(l1, l2), xlabel="Energy type", ylabel="Potential energy [kcal/mol]", plot_type="bar") plots[lra_reo] = PlotData("Reorganization energy (l={} -> " "l={})".format(l1, l2), xlabel="Energy type", ylabel="Potential energy [kcal/mol]", plot_type="bar") plots["lambda_egap"] = PlotData( "Sampling (binning): " "Check the overlap between lambda " "frames in each bin", xlabel="Egap [kcal/mol]", ylabel="Lambda", plot_type="scatter") plots["pts_egap"] = PlotData( "Sampling (total counts): " "Check for breaks.", xlabel="Egap [kcal/mol]", ylabel="Number of points") plots["pts_egap_hists"] = PlotData( "Sampling (histograms, 1st output " "only): Check overlap ", xlabel="Egap", ylabel="Number of points,") plots["pts_egap_l"] = PlotData("Sampling3D (1st output only)", xlabel="Egap", ylabel="Lambda", zlabel="Number of points", plot_type="wireframe") plots["dgl"] = PlotData("dG vs Lambda", xlabel="Lambda", ylabel="Free energy [kcal/mol]") plots["dgl_delta"] = PlotData("(dGf-dGr) vs Lambda: Lower, better", xlabel="Lambda", ylabel="Free energy [kcal/mol]") plots["dgl_forw"] = PlotData("dG vs Lambda (forward)", xlabel="Lambda", ylabel="Free energy [kcal/mol]") plots["dgl_rev"] = PlotData("dG vs Lambda (reverse)", xlabel="Lambda", ylabel="Free energy [kcal/mol]") plots["rxy"] = PlotData("Reactive distance", xlabel="E1-E2 [kcal/mol]", ylabel=u"Rxy [Å]") # get the column names from the first output (0th is lambda) qfo0 = self.qfos.values()[0] evb_states = qfo0.header.nstates part0_coltitles = qfo0.part0.data_state[0].get_column_titles() for col in part0_coltitles[4:]: for evb_state in range(evb_states): est = evb_state + 1 key = "e{}l_{}".format(est, col) plots[key] = PlotData("E{} vs Lambda ({})".format(est, col), xlabel="Lambda (state {})".format(est), ylabel="E{} ({}) [kcal/mol]" "".format(est, col)) key = "e{}l_{}".format(est, col) plots[key] = PlotData("E{} vs Lambda ({})".format(est, col), xlabel="Lambda (state {})".format(est), ylabel="E{} ({}) [kcal/mol]" "".format(est, col)) # populate PlotData subplots (each output is a subplot) for qfo_path, qfo in self.qfos.iteritems(): relp = os.path.relpath(qfo_path) # Part 0 energies for evb_state in range(evb_states): est = evb_state + 1 data = qfo.part0.data_state[evb_state].get_columns() for i, colname in enumerate(part0_coltitles[4:]): key = "e{}l_{}".format(est, colname) # 3rd column is lambda, 4,5,6,7.. are energies plots[key].add_subplot(relp, data[3], data[i + 4]) # Part 1 FEP data = qfo.part1.data.get_columns(["Lambda", "dGf", "dGr", "dG"]) delta = [ 0, ] for dgf, dgb in zip(data[1][1:], data[2][:-1]): dg = abs(dgf) - abs(dgb) delta.append(dg) plots["dgl_delta"].add_subplot(relp, data[0], delta) plots["dgl_forw"].add_subplot(relp, data[0], data[1]) plots["dgl_rev"].add_subplot(relp, data[0], data[2]) plots["dgl"].add_subplot(relp, data[0], data[3]) # Part 2 (sampling/binning) data = qfo.part2.data.get_columns(["Lambda", "Egap", "points"]) plots["lambda_egap"].add_subplot(relp, data[1], data[0]) ## use only the first one, too much data otherwise if not plots["pts_egap_hists"].subplots: rows = zip(*data) #transpose columns to rows for l in sorted(set(data[0])): rows_f = [(eg, pts) for lam, eg, pts in rows if lam == l] eg, pts = zip(*rows_f) #transpose rows to columns plots["pts_egap_hists"].add_subplot( "{}_{}".format(relp, l), eg, pts) ## use only the first one, too much data otherwise if not plots["pts_egap_l"].subplots: plots["pts_egap_l"].add_subplot(relp, data[1], data[0], data[2]) # Part 3 data = qfo.part3.data.get_columns( ["Egap", "dGg_norm", "r_xy", "points"]) plots["dgde"].add_subplot(relp, data[0], data[1]) plots["rxy"].add_subplot(relp, data[0], data[2]) plots["pts_egap"].add_subplot(relp, data[0], data[3]) if self.lras: data = self.lra_stats.get_columns() plots[lra_de_st1].add_subplot("average", data[0], data[1], yerror=data[2]) plots[lra_de_st2].add_subplot("average", data[0], data[3], yerror=data[4]) plots[lra_lra].add_subplot("average", data[0], data[5], yerror=data[6]) plots[lra_reo].add_subplot("average", data[0], data[7], yerror=data[8]) return plots
def plotdata(self): """Return GC data as a dictionary of PlotData objects. Example keys in returned dictionary: 'gc_el': PlotData of electrostatic GCs, one subplot with means vs residues index 'gc_el_top': 'el', sorted by absolute contribution, only first 20 means vs "resid.resname" 'gc_vdw': PlotData of electrostatic GCs, one subplot with means vs residue indexes 'gc_vdw_top': 'vdw', sorted by absolute contribution, only first 20 means vs "resid.resname" """ plots = ODict() # all failed if not self.gcs: return plots lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0] # make PlotData objects cols = self.gcs_stats.get_columns() resids, _, _, vdws, vdwss, els, elss = cols N = len(self.gcs) plots["gc_el"] = PlotData("LRA GC (electrostatic): " "dG( l={} -> l={} ) (iscale={})" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue index", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_el"].add_subplot("mean_N={}".format(N), resids, els, yerror=elss) plots["gc_vdw"] = PlotData("LRA GC (VdW): " "dG( l={} -> l={} )".format(lamb1, lamb2), xlabel="Residue index", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_vdw"].add_subplot("mean_N={}".format(N), resids, vdws, yerror=vdwss) sorted_rows = sorted(self.gcs_stats.get_rows(), \ key=lambda x: -abs(x[5])) resids, resnames, _, _, _, els, elss = zip(*sorted_rows[:20]) keys = ["{}_{}".format(rn.capitalize(), ri) \ for ri, rn in zip(resids, resnames)] plots["gc_el_top"] = PlotData("LRA GC (electrostatic): " "dG( l={} -> l={} ) (iscale={}), top 20" "".format(lamb1, lamb2, self._scale_ionized), xlabel="Residue", ylabel="Free energy [kcal/mol]", plot_type="bar") plots["gc_el_top"].add_subplot("mean_N={}".format(N), keys, els, yerror=elss) return plots
def get_plotdata(self, stride=1): """Return 'useful data' as a dictionary of PlotData objects. Useful data: - Temperatures - Offdiagonal distances - Energies (Q and non-Q) Args: stride (int, optional): use only every Nth point, default=1 """ plots = ODict() # make PlotData objects time_label = "Time [{}]".format(self.time_unit) plots = ODict() plots["temp"] = PlotData("Temperature", xlabel=time_label, ylabel="T [K]") plots["offdiags"] = PlotData("Offdiagonal distances", xlabel=time_label, ylabel="Distance [A]") t_dc = self.get_temps(stride=stride) t_cs, t_cts = t_dc.get_columns(), t_dc.column_titles for i, t_ct in enumerate(t_cts[1:]): plots["temp"].add_subplot(t_ct, t_cs[0], t_cs[i+1]) # 0==Time d_dc = self.get_offdiags(stride=stride) d_cs, d_cts = d_dc.get_columns(), d_dc.column_titles for i, d_ct in enumerate(d_cts[1:]): plots["offdiags"].add_subplot(d_ct, d_cs[0], d_cs[i+1]) # 0==Time for k in self.en_section_keys: key = "E_{}".format(k) plots[key] = PlotData("Energy: " + k, xlabel=time_label, ylabel="Energy [kcal/mol]") e_dc = self.get_energies(k, stride=stride) e_cs, e_cts = e_dc.get_columns(), e_dc.column_titles if e_cs: for i, e_ct in enumerate(e_cts[1:]): plots[key].add_subplot(e_ct, e_cs[0], e_cs[i+1]) # 0==Time for k in self.qen_section_keys: for evb_state in range(1, self.n_evb_states + 1): key = "EQ{}_{}".format(evb_state, k) plots[key] = PlotData("Q Energy: {} (state {})" "".format(k, evb_state), xlabel=time_label, ylabel="Energy [kcal/mol]") qe_dc = self.get_q_energies(k, evb_state, stride=stride) qe_cs, qe_cts = qe_dc.get_columns(), qe_dc.column_titles if qe_cs: for i, qe_ct in enumerate(qe_cts[1:]): plots[key].add_subplot(qe_ct, qe_cs[0], qe_cs[i+1]) return plots