示例#1
0
 def test_encoder(self):
     pl = PlotData("random_title", "bar", "xlab", "ylab", "zlab")
     pl.add_subplot("1.subplot", [1, 2, 3, 4], [2, 4, 8, 16], [2, 4, 8, 16],
                    [1, 2, 3, 4])
     pl.add_subplot("2.subplot", [1, 2, 3, 4], [3, 4, 5, 6], [2, 3, 4, 5],
                    [1, 1, 1, 1])
     jsonenc = PlotDataJSONEncoder(indent=2, separators=(',', ': '))
     ref_data = open("data/PlotData.json").read()
     assert ref_data == round_json(jsonenc.encode(pl))
示例#2
0
    def plotdata(self):
        """Return GC data as a dictionary of PlotData objects.

        Example keys in returned dictionary:
            'gc_lra_el': PlotData of electrostatic LRA group contributions,
                         one subplot - means vs residue index

            'gc_lra_el_top': PlotData of top 20 electrostatic LRA GCs
                             one subplot - means vs "resid.resname"

            'gc_lra_vdw': PlotData of vdw LRA GCs,
                          one subplot - means vs residue indexes

            'gc_reorg_el': PlotData of el. 'REORG' group contributions,
                           one subplot - means vs residue index

            'gc_de1_el': PlotData of electrostatic <E1 - E2>_1,
                         one subplot - means vs residue index

            'gc_de2_el': PlotData of electrostatic <E1 - E2>_2,
                         one subplot - means vs residue index
        """

        plots = ODict()

        # all failed
        if not self.gcs:
            return plots

        lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0]

        # make PlotData objects
        plots["gc_lra_el_top"] = PlotData("Top LRA GC (El, {}->{}, iscale={}),"
                                          " top 20".format(
                                              lamb1, lamb2,
                                              self._scale_ionized),
                                          xlabel="Residue",
                                          ylabel="Free energy  [kcal/mol]",
                                          plot_type="bar")

        plots["gc_reorg_el_top"] = PlotData(
            "Top REORG GC (El, {}->{}, iscale={}),"
            " top 20".format(lamb1, lamb2, self._scale_ionized),
            xlabel="Residue",
            ylabel="Free energy  [kcal/mol]",
            plot_type="bar")

        plots["gc_lra_el"] = PlotData("LRA GC (El, {}->{}, iscale={})"
                                      "".format(lamb1, lamb2,
                                                self._scale_ionized),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_lra_vdw"] = PlotData("LRA GC (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_reorg_el"] = PlotData("REORG GC (El, {}->{}, iscale={})"
                                        "".format(lamb1, lamb2,
                                                  self._scale_ionized),
                                        xlabel="Residue index",
                                        ylabel="Energy  [kcal/mol]",
                                        plot_type="bar")

        plots["gc_reorg_vdw"] = PlotData("REORG GC (VdW, {}->{})"
                                         "".format(lamb1, lamb2),
                                         xlabel="Residue index",
                                         ylabel="Energy  [kcal/mol]",
                                         plot_type="bar")

        plots["gc_de1_el"] = PlotData("<E1-E2>_1 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de1_vdw"] = PlotData("<E1-E2>_1 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_de2_el"] = PlotData("<E1-E2>_2 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de2_vdw"] = PlotData("<E1-E2>_2 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        cols = self.gcs_stats.get_columns()
        resids = cols[0]
        title = "mean_N={}".format(len(self.gcs))

        plots["gc_de1_vdw"].add_subplot(title, resids, cols[3], yerror=cols[4])
        plots["gc_de1_el"].add_subplot(title, resids, cols[5], yerror=cols[6])

        plots["gc_de2_vdw"].add_subplot(title, resids, cols[7], yerror=cols[8])
        plots["gc_de2_el"].add_subplot(title, resids, cols[9], yerror=cols[10])

        plots["gc_lra_vdw"].add_subplot(title,
                                        resids,
                                        cols[11],
                                        yerror=cols[12])
        plots["gc_lra_el"].add_subplot(title,
                                       resids,
                                       cols[13],
                                       yerror=cols[14])

        plots["gc_reorg_vdw"].add_subplot(title,
                                          resids,
                                          cols[15],
                                          yerror=cols[16])
        plots["gc_reorg_el"].add_subplot(title,
                                         resids,
                                         cols[17],
                                         yerror=cols[18])

        # top 20 LRA el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[5]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[13], cols[14]
        plots["gc_lra_el_top"].add_subplot(title, keys, els, yerror=elstd)

        # top 20 reorg el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[9]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[17], cols[18]
        plots["gc_reorg_el_top"].add_subplot(title, keys, els, yerror=elstd)

        return plots
示例#3
0
    def plotdata(self):
        """Return 'useful data' as a dictionary of PlotData objects.

        Each qfep_output will be a subplot in one PlotData, except in the case
        of LRA where there is only one subplot: the average and stdev over all
        outputs.

        Useful data:
        - All energies from part 0
        - FEP back, forward and average dG profiles vs lambda
        - FEP delta (forward - reverse) vs lambda
        - Sampling profiles
        - LRA contributions (statistics)
        - Free energy profiles vs Egap (bin-averaged)
        - Coefficients vs Egap (part3)
        """

        plots = ODict()

        # no QFepOutput objects (all failed to parse)
        if not self.qfos:
            return plots

        # make PlotData objects
        plots["dgde"] = PlotData("Free energy profile",
                                 xlabel="E1-E2  [kcal/mol]",
                                 ylabel="Free energy  [kcal/mol]")
        if self._lra_lambdas:
            l1, l2 = self._lra_lambdas
            lra_de_st1 = "lra_de_st1_{}".format(l1)
            lra_de_st2 = "lra_de_st2_{}".format(l2)
            lra_lra = "lra_lra_{}{}".format(l1, l2)
            lra_reo = "lra_reo_{}{}".format(l1, l2)
            plots[lra_de_st1] = PlotData("E2-E1 (lambda={})".format(l1),
                                         xlabel="Energy type",
                                         ylabel="Potential energy  [kcal/mol]",
                                         plot_type="bar")
            plots[lra_de_st2] = PlotData("E2-E1 (lambda={})".format(l2),
                                         xlabel="Energy type",
                                         ylabel="Potential energy  [kcal/mol]",
                                         plot_type="bar")
            plots[lra_lra] = PlotData("LRA (l={} -> l={})".format(l1, l2),
                                      xlabel="Energy type",
                                      ylabel="Potential energy  [kcal/mol]",
                                      plot_type="bar")
            plots[lra_reo] = PlotData("Reorganization energy (l={} -> "
                                      "l={})".format(l1, l2),
                                      xlabel="Energy type",
                                      ylabel="Potential energy  [kcal/mol]",
                                      plot_type="bar")

        plots["lambda_egap"] = PlotData(
            "Sampling (binning): "
            "Check the overlap between lambda "
            "frames in each bin",
            xlabel="Egap [kcal/mol]",
            ylabel="Lambda",
            plot_type="scatter")
        plots["pts_egap"] = PlotData(
            "Sampling (total counts): "
            "Check for breaks.",
            xlabel="Egap [kcal/mol]",
            ylabel="Number of points")
        plots["pts_egap_hists"] = PlotData(
            "Sampling (histograms, 1st output "
            "only): Check overlap ",
            xlabel="Egap",
            ylabel="Number of points,")
        plots["pts_egap_l"] = PlotData("Sampling3D (1st output only)",
                                       xlabel="Egap",
                                       ylabel="Lambda",
                                       zlabel="Number of points",
                                       plot_type="wireframe")
        plots["dgl"] = PlotData("dG vs Lambda",
                                xlabel="Lambda",
                                ylabel="Free energy  [kcal/mol]")
        plots["dgl_delta"] = PlotData("(dGf-dGr) vs Lambda: Lower, better",
                                      xlabel="Lambda",
                                      ylabel="Free energy  [kcal/mol]")
        plots["dgl_forw"] = PlotData("dG vs Lambda (forward)",
                                     xlabel="Lambda",
                                     ylabel="Free energy  [kcal/mol]")
        plots["dgl_rev"] = PlotData("dG vs Lambda (reverse)",
                                    xlabel="Lambda",
                                    ylabel="Free energy  [kcal/mol]")
        plots["rxy"] = PlotData("Reactive distance",
                                xlabel="E1-E2  [kcal/mol]",
                                ylabel=u"Rxy  [Å]")

        # get the column names from the first output (0th is lambda)
        qfo0 = self.qfos.values()[0]
        evb_states = qfo0.header.nstates
        part0_coltitles = qfo0.part0.data_state[0].get_column_titles()

        for col in part0_coltitles[4:]:
            for evb_state in range(evb_states):
                est = evb_state + 1
                key = "e{}l_{}".format(est, col)
                plots[key] = PlotData("E{} vs Lambda ({})".format(est, col),
                                      xlabel="Lambda (state {})".format(est),
                                      ylabel="E{} ({})  [kcal/mol]"
                                      "".format(est, col))
                key = "e{}l_{}".format(est, col)
                plots[key] = PlotData("E{} vs Lambda ({})".format(est, col),
                                      xlabel="Lambda (state {})".format(est),
                                      ylabel="E{} ({})  [kcal/mol]"
                                      "".format(est, col))

        # populate PlotData subplots (each output is a subplot)
        for qfo_path, qfo in self.qfos.iteritems():

            relp = os.path.relpath(qfo_path)

            # Part 0 energies
            for evb_state in range(evb_states):
                est = evb_state + 1
                data = qfo.part0.data_state[evb_state].get_columns()
                for i, colname in enumerate(part0_coltitles[4:]):
                    key = "e{}l_{}".format(est, colname)
                    # 3rd column is lambda, 4,5,6,7.. are energies
                    plots[key].add_subplot(relp, data[3], data[i + 4])

            # Part 1 FEP
            data = qfo.part1.data.get_columns(["Lambda", "dGf", "dGr", "dG"])

            delta = [
                0,
            ]
            for dgf, dgb in zip(data[1][1:], data[2][:-1]):
                dg = abs(dgf) - abs(dgb)
                delta.append(dg)

            plots["dgl_delta"].add_subplot(relp, data[0], delta)
            plots["dgl_forw"].add_subplot(relp, data[0], data[1])
            plots["dgl_rev"].add_subplot(relp, data[0], data[2])
            plots["dgl"].add_subplot(relp, data[0], data[3])

            # Part 2 (sampling/binning)
            data = qfo.part2.data.get_columns(["Lambda", "Egap", "points"])
            plots["lambda_egap"].add_subplot(relp, data[1], data[0])

            ## use only the first one, too much data otherwise
            if not plots["pts_egap_hists"].subplots:
                rows = zip(*data)  #transpose columns to rows
                for l in sorted(set(data[0])):
                    rows_f = [(eg, pts) for lam, eg, pts in rows if lam == l]
                    eg, pts = zip(*rows_f)  #transpose rows to columns

                    plots["pts_egap_hists"].add_subplot(
                        "{}_{}".format(relp, l), eg, pts)
            ## use only the first one, too much data otherwise
            if not plots["pts_egap_l"].subplots:
                plots["pts_egap_l"].add_subplot(relp, data[1], data[0],
                                                data[2])

            # Part 3
            data = qfo.part3.data.get_columns(
                ["Egap", "dGg_norm", "r_xy", "points"])
            plots["dgde"].add_subplot(relp, data[0], data[1])
            plots["rxy"].add_subplot(relp, data[0], data[2])
            plots["pts_egap"].add_subplot(relp, data[0], data[3])

        if self.lras:
            data = self.lra_stats.get_columns()
            plots[lra_de_st1].add_subplot("average",
                                          data[0],
                                          data[1],
                                          yerror=data[2])
            plots[lra_de_st2].add_subplot("average",
                                          data[0],
                                          data[3],
                                          yerror=data[4])
            plots[lra_lra].add_subplot("average",
                                       data[0],
                                       data[5],
                                       yerror=data[6])
            plots[lra_reo].add_subplot("average",
                                       data[0],
                                       data[7],
                                       yerror=data[8])

        return plots
示例#4
0
    def plotdata(self):
        """Return GC data as a dictionary of PlotData objects.

        Example keys in returned dictionary:
            'gc_el': PlotData of electrostatic GCs, one subplot with means vs
                  residues index

            'gc_el_top': 'el', sorted by absolute contribution, only first 20
                      means vs "resid.resname"

            'gc_vdw': PlotData of electrostatic GCs, one subplot with means vs
                   residue indexes

            'gc_vdw_top': 'vdw', sorted by absolute contribution, only first 20
                       means vs "resid.resname"

        """

        plots = ODict()

        # all failed
        if not self.gcs:
            return plots

        lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0]
        # make PlotData objects

        cols = self.gcs_stats.get_columns()
        resids, _, _, vdws, vdwss, els, elss = cols

        N = len(self.gcs)
        plots["gc_el"] = PlotData("LRA GC (electrostatic): "
                                  "dG( l={} -> l={} ) (iscale={})"
                                  "".format(lamb1, lamb2,
                                            self._scale_ionized),
                                  xlabel="Residue index",
                                  ylabel="Free energy  [kcal/mol]",
                                  plot_type="bar")
        plots["gc_el"].add_subplot("mean_N={}".format(N),
                                   resids, els, yerror=elss)

        plots["gc_vdw"] = PlotData("LRA GC (VdW): "
                                   "dG( l={} -> l={} )".format(lamb1, lamb2),
                                   xlabel="Residue index",
                                   ylabel="Free energy  [kcal/mol]",
                                   plot_type="bar")
        plots["gc_vdw"].add_subplot("mean_N={}".format(N),
                                    resids, vdws, yerror=vdwss)


        sorted_rows = sorted(self.gcs_stats.get_rows(), \
                                            key=lambda x: -abs(x[5]))

        resids, resnames, _, _, _, els, elss = zip(*sorted_rows[:20])

        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]

        plots["gc_el_top"] = PlotData("LRA GC (electrostatic): "
                                      "dG( l={} -> l={} ) (iscale={}), top 20"
                                      "".format(lamb1, lamb2,
                                                self._scale_ionized),
                                      xlabel="Residue",
                                      ylabel="Free energy  [kcal/mol]",
                                      plot_type="bar")
        plots["gc_el_top"].add_subplot("mean_N={}".format(N),
                                       keys, els,
                                       yerror=elss)

        return plots
示例#5
0
    def get_plotdata(self, stride=1):
        """Return 'useful data' as a dictionary of PlotData objects.

        Useful data:
        - Temperatures
        - Offdiagonal distances
        - Energies (Q and non-Q)

        Args:
           stride (int, optional):  use only every Nth point, default=1
        """

        plots = ODict()

        # make PlotData objects

        time_label = "Time [{}]".format(self.time_unit)
        plots = ODict()
        plots["temp"] = PlotData("Temperature",
                                 xlabel=time_label,
                                 ylabel="T [K]")

        plots["offdiags"] = PlotData("Offdiagonal distances",
                                     xlabel=time_label,
                                     ylabel="Distance [A]")

        t_dc = self.get_temps(stride=stride)
        t_cs, t_cts = t_dc.get_columns(), t_dc.column_titles
        for i, t_ct in enumerate(t_cts[1:]):
            plots["temp"].add_subplot(t_ct, t_cs[0], t_cs[i+1]) # 0==Time


        d_dc = self.get_offdiags(stride=stride)
        d_cs, d_cts = d_dc.get_columns(), d_dc.column_titles
        for i, d_ct in enumerate(d_cts[1:]):
            plots["offdiags"].add_subplot(d_ct, d_cs[0], d_cs[i+1]) # 0==Time


        for k in self.en_section_keys:
            key = "E_{}".format(k)
            plots[key] = PlotData("Energy: " + k,
                                  xlabel=time_label,
                                  ylabel="Energy [kcal/mol]")
            e_dc = self.get_energies(k, stride=stride)
            e_cs, e_cts = e_dc.get_columns(), e_dc.column_titles
            if e_cs:
                for i, e_ct in enumerate(e_cts[1:]):
                    plots[key].add_subplot(e_ct, e_cs[0], e_cs[i+1]) # 0==Time


        for k in self.qen_section_keys:
            for evb_state in range(1, self.n_evb_states + 1):
                key = "EQ{}_{}".format(evb_state, k)
                plots[key] = PlotData("Q Energy: {} (state {})"
                                      "".format(k, evb_state),
                                      xlabel=time_label,
                                      ylabel="Energy [kcal/mol]")
                qe_dc = self.get_q_energies(k, evb_state, stride=stride)
                qe_cs, qe_cts = qe_dc.get_columns(), qe_dc.column_titles
                if qe_cs:
                    for i, qe_ct in enumerate(qe_cts[1:]):
                        plots[key].add_subplot(qe_ct, qe_cs[0], qe_cs[i+1])

        return plots