def Report(self, est, title):
        self.html_writer.write('</br><b>%s</b><br>\n' % title)

        finite = np.isfinite(est)
        resid = abs(self.b[finite] - est[finite])
        fig = plt.figure(figsize=(5,5), dpi=60)
        cdf(list(resid.flat), figure=fig)
        #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig)
        plt.title("RMSE = %.1f, N = %d" % (rms_flat(resid.flat), resid.shape[1]))
        plt.xlabel(r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]")
        plt.ylabel(r"CDF")
        self.html_writer.embed_matplotlib_figure(fig)

        rowdicts = []
        for i in xrange(self.b.shape[1]):
            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(self.S[:, i], self.cids)
            rowdict['anchored'] = self.anchored[0, i]
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est[0, i]
            if np.isfinite(est[0, i]):
                rowdict['|err|'] = abs(self.b[0, i] - est[0, i])
            else:
                rowdict['|err|'] = 0 
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x:x['|err|'], reverse=True)            
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_table(rowdicts,
            headers=['row', 'type', 'reaction', 'anchored', 'obs', 'est', '|err|'], decimal=1)
        self.html_writer.div_end()
Exemplo n.º 2
0
def plot_histogram(histogram, html_writer, title='', max_pathway_length=8, xmin=None, xlim=20, error_bars=True, min_to_show=20, legend_loc='upper left'):
    fig = pylab.figure()

    pylab.hold(True)

    reps = 1000
    
    y_offset = 0
    offset_step = 0.007
    colors = {1:'r', 2:'orange', 3:'green', 4:'cyan', 5:'blue', 'Rest':'violet', 'Not first':'k--', 'No known regulation':'grey', 'Activated':'green', 'Inhibited':'r', 'Mixed regulation':'blue'}
    for key, value in histogram.iteritems():
        if len(value) >= min_to_show:
            m = stats.cmedian(value)
            
            sample_std = None
            
            if error_bars:
                sample_vals = []
                i = 0
                while i < reps:
                    samples = []
                    while len(samples) < len(value):
                        samples.append(random.choice(value))
                    sample_vals.append(pylab.median(samples))
                    i += 1
                
                sample_std = pylab.std(sample_vals)
                        
            plotting.cdf(value, label='%s (med=%.1f, N=%d)' % \
                (key, m, len(value)),
                style=colors.get(key, 'grey'), std=sample_std, y_offset=y_offset)
            y_offset += offset_step
            

    xmin = -1 * xlim if xmin == None else xmin
    pylab.xlim(xmin, xlim)
    pylab.xlabel('Irreversability')
    #pylab.xlabel('deltaG')
    pylab.ylabel('Cumulative distribution')
    legendfont = matplotlib.font_manager.FontProperties(size=11)
    pylab.legend(loc=legend_loc, prop=legendfont)
    pylab.title(title)
    pylab.hold(False)
    
    if 'Not first' in histogram:
        print '%s, first vs. non-first ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram[1], histogram['Not first'])
    
    if 'Inhibited' in histogram:
        print '%s, inhibited vs. non-regulated ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram['Inhibited'], histogram['No known regulation'])
         
    
    #for k1, h1 in histogram.iteritems():
    #    for k2, h2 in histogram.iteritems():
    #        print k1, k2, stats.ranksums(h1, h2)
    
    return fig
Exemplo n.º 3
0
def try_kegg_api():
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter('../res/dG0_test.html')
    G = GroupContribution(db, html_writer=html_writer)
    G.init()
    
    wsdl = 'http://soap.genome.jp/KEGG.wsdl'
    serv = WSDL.Proxy(wsdl)
    
    rid_file = open('../res/eco_rids.txt', 'w')
    rids = set()
    for x in serv.list_pathways('eco'):
        pathway_id = x['entry_id']
        for reaction_id in serv.get_reactions_by_pathway(pathway_id):
            rid = int(reaction_id[4:])
            if rid not in rids:
                rids.add(rid)
                rid_file.write('%d\n' % rid)
    rid_file.close()
            
    c_mid = 1e-3
    pH, pMg, I, T = (7.0, 3.0, 0.1, 298.15)
    
    rid2reversibility = {}
    misses = 0
    for rid in sorted(rids):
        try:
            reaction = G.kegg.rid2reaction(rid)
            r = CalculateReversability(reaction, G, c_mid, pH, pMg, I, T)
            rid2reversibility[rid] = r
        except thermodynamics.MissingCompoundFormationEnergy:
            misses += 1
            continue
    
    print 'hits = %d, misses = %d' % len(rid2reversibility), misses
    median = pylab.median(rid2reversibility.values())
    print 'median = %.1f' % median

    pylab.figure()
    pylab.hold(True)
    plotting.cdf(rid2reversibility.values(), 'all reactions', 'r', show_median=True)
    pylab.show()
    def Report(self, est, title):
        self.html_writer.write('</br><b>%s</b><br>\n' % title)

        finite = np.isfinite(est)
        resid = abs(self.b[finite] - est[finite])
        fig = plt.figure(figsize=(5, 5), dpi=60)
        cdf(list(resid.flat), figure=fig)
        #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig)
        plt.title("RMSE = %.1f, N = %d" %
                  (rms_flat(resid.flat), resid.shape[1]))
        plt.xlabel(
            r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]")
        plt.ylabel(r"CDF")
        self.html_writer.embed_matplotlib_figure(fig)

        rowdicts = []
        for i in xrange(self.b.shape[1]):
            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(
                self.S[:, i], self.cids)
            rowdict['anchored'] = self.anchored[0, i]
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est[0, i]
            if np.isfinite(est[0, i]):
                rowdict['|err|'] = abs(self.b[0, i] - est[0, i])
            else:
                rowdict['|err|'] = 0
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['|err|'], reverse=True)
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_table(rowdicts,
                                     headers=[
                                         'row', 'type', 'reaction', 'anchored',
                                         'obs', 'est', '|err|'
                                     ],
                                     decimal=1)
        self.html_writer.div_end()
Exemplo n.º 5
0
    def PlotCDF(self):
        special_pairs = {('eco:b3236', 'eco:b0720'):"mdh:gltA", # malate dehydrogenase -> oxaloacetate -> citrate synthase
                         ('eco:b1263', 'eco:b1264'):"trpD:trpE"} # trpD -> chorismate -> trpE (two components of anthraline synthase)
        
        query = """
                SELECT gene1, gene2, min(dGc2 - dGc1), max(score)
                FROM %s
                WHERE dGc1 + dGc2 < 0
                AND dGc1 > 10
                GROUP BY gene1, gene2
                """ % (self.GENE_PAIRS_TABLE_NAME)

        data = []
        markers = []
        for row in self.db.Execute(query):
            gene1, gene2, ddG, score = row
            if (gene1, gene2) in special_pairs:
                markers.append((special_pairs[(gene1, gene2)], ddG))
            data.append([ddG, float(score or 0)])
        data = np.matrix(data)

        ind1 = list(np.where(data[:, 1] > 0)[0].flat)
        ind2 = list(np.where(data[:, 1] == 0)[0].flat)
    
        fig = plt.figure(figsize=(6,6), dpi=90)    
        cdf((data[ind2, 0]).flat, label="non-interacting (N = %d)" % len(ind2), style='r', figure=fig)
        cdf((data[ind1, 0]).flat, label="interacting (N = %d)" % len(ind1), style='g', figure=fig)
        for label, ddG in markers:
            plt.plot([ddG, ddG], [0, 1], 'b--', figure=fig)
            plt.text(ddG, 0.1, label)
        plt.xlim(-500, 500)
        plt.xlabel(r"$\Delta G'^c$ (2nd) - $\Delta G'^c$ (1st) [kJ/mol]")
        plt.ylabel(r"Cumulative Distribution Function")
        plt.legend(loc="upper left")

        self.html_writer.embed_matplotlib_figure(fig, width=400, height=400, name='channeling_cdf')
Exemplo n.º 6
0
def compare_annotations(reaction_list, thermo, html_writer, cmap, xlim=1e9):
    html_writer.write('<h1>Compare reaction annotations to Reversibility Index</h1>\n')
    histogram = {}
    error_counts = {'hits': 0, 'misses': 0, 'no_gamma': 0}

    debug_dict_list = []
    for reaction in reaction_list:
        try:
            dG0 = reaction.PredictReactionEnergy(thermo)
        except (MissingCompoundFormationEnergy, MissingReactionEnergy) as e:
            logging.warning(str(e))
            error_counts['misses'] += 1
            continue

        gamma = CalculateReversability(reaction, thermo, concentration_map=cmap)
        if gamma is None:
            error_counts['no_gamma'] += 1
        else:
            error_counts['hits'] += 1
            histogram.setdefault(reaction.direction, []).append(gamma)
            debug_dict_list.append({'sortkey':gamma,
                                    'Reaction Name':reaction.name,
                                    'annotation':reaction.direction,
                                    'KEGG Reaction':reaction.to_hypertext(),
                                    'Rev. index':"%.3g" % gamma,
                                    'dG0':"%.2f" % dG0})
    
    debug_dict_list.sort(key=lambda(x):x['sortkey'])
    div_id = html_writer.insert_toggle()
    html_writer.div_start(div_id)
    html_writer.write_table(debug_dict_list, headers=['Rev. index', 
        'dG0', 'Reaction Name', 'KEGG Reaction', 'annotation'])
    html_writer.div_end()
    html_writer.write('</br>\n')
    
    html_writer.write_ul(["Reactions with known dG0: %d" % error_counts['hits'],
                          "Reactions with unknown dG0: %d" % error_counts['misses'],
                          "Reactions with unknown gamma: %d" % error_counts['no_gamma']])
    
    # plot the bar 
    fig = pylab.figure(figsize=(6,6), dpi=90)
    pylab.hold(True)
    plotting.cdf(histogram['<=>'], label='reversible (%d reactions)' % len(histogram['<=>']),
                 style='green', figure=fig)
    plotting.cdf(histogram['=>'], label='forward only (%d reactions)' % len(histogram['=>']),
                 style='red', figure=fig)
    plotting.cdf(histogram['<='], label='reverse only (%d reactions)' % len(histogram['<=']),
                 style='orange', figure=fig)

    pylab.xlabel('Reversability index - $\hat{\gamma}$')
    pylab.ylabel('Cumulative Distribution')
    pylab.xscale('log')
    pylab.xlim((1/xlim, xlim))
    pylab.legend(loc='upper left')
    html_writer.embed_matplotlib_figure(fig, width=640, height=480, name='FEIST_CDF')
Exemplo n.º 7
0
def compare_reversibility_to_dG0(reaction_list, thermo, html_writer, cmap=None):
    html_writer.write('<h1>Reversibility index vs. equilibrium constants</h1>\n')
    cmap = cmap or GetEmptyConcentrationMap()
    
    x_range = (1e-9, 1e9)
    y_range = (1e-9, 1e9)

    x_threshold = 1e3
    y_threshold = 1e3
    
    regime_counters = {}
    stoich_counters = {}
    data_mat = pylab.zeros((0, 4))
    
    debug_dict_list = []
    for reaction in reaction_list:
        debug_dict = {'name':reaction.name, 
                      'KEGG Reaction':reaction.to_hypertext()}
        
        try:
            reaction.Balance(balance_water=True, exception_if_unknown=True)
        except (KeggReactionNotBalancedException, OpenBabelError):
            continue
        
        dG0 = reaction.PredictReactionEnergy(thermo)
        if np.isnan(dG0):
            debug_dict['sortkey'] = 0
            debug_dict['error'] = "Cannot calculate Gibbs energy"
        else:
            Keq = pylab.exp(-dG0/(R*thermo.T))
    
            n_s = -sum([x for cid, x in reaction.sparse.iteritems() if (x < 0 and cid not in cmap)])
            n_p = sum([x for cid, x in reaction.sparse.iteritems() if (x > 0 and cid not in cmap)])
            if (n_p + n_s) == 0:
                continue
            stoich_counters.setdefault((n_s, n_p), 0)
            stoich_counters[n_s, n_p] += 1
            
            log_gamma = CalculateReversability(reaction, thermo,
                                               concentration_map=cmap,
                                               logscale=True)
            
            if Keq < 1.0/x_threshold:
                Krev = -1
            elif Keq < x_threshold:
                Krev = 0
            else:
                Krev = 1
                
            if log_gamma < -np.log(y_threshold):
                Grev = -1
            elif log_gamma < np.log(y_threshold):
                Grev = 0
            else:
                Grev = 1
                
            regime_counters.setdefault((Krev, Grev), 0)
            regime_counters[Krev, Grev] += 1
            data_mat = pylab.vstack([data_mat, [Keq, log_gamma, Krev, Grev]])
            debug_dict['sortkey'] = log_gamma
            debug_dict['log(&gamma;)'] = "%.2e" % log_gamma
            debug_dict[thermodynamic_constants.symbol_dr_G0_prime] = dG0
        
        debug_dict_list.append(debug_dict)
    
    debug_dict_list.sort(key=lambda(x):x['sortkey'])
    div_id = html_writer.insert_toggle()
    html_writer.div_start(div_id)
    html_writer.write_table(debug_dict_list, headers=['log(&gamma;)',
        thermodynamic_constants.symbol_dr_G0_prime, 'name', 'KEGG Reaction',
        'error'])
    html_writer.div_end()
    html_writer.write('</br>\n')
    
    fig = pylab.figure(figsize=(6,6), dpi=90)
    pylab.xlabel("$K'$", figure=fig)
    pylab.ylabel(r"$\hat{\gamma} = \left( K' / Q'' \right)^{2/N}$", figure=fig)
    
    shading_color = (1.0, 0.7, 0.7)
    #pylab.axvspan(x_range[0], 1.0/x_threshold, ymin=0, ymax=1, color=x_color, alpha=0.3)
    #pylab.axvspan(x_threshold, x_range[1], ymin=0, ymax=1, color=x_color, alpha=0.3)
    #pylab.axhspan(y_range[0], 1.0/y_threshold, xmin=0, xmax=1, color=y_color, alpha=0.3)
    #pylab.axhspan(y_threshold, y_range[1], xmin=0, xmax=1, color=y_color, alpha=0.3)
    pylab.axvspan(x_range[0], 1.0/x_threshold, ymin=1.0/3.0, ymax=2.0/3.0, color=shading_color)
    pylab.axvspan(x_threshold, x_range[1], ymin=1.0/3.0, ymax=2.0/3.0, color=shading_color)
    pylab.axhspan(y_range[0], 1.0/y_threshold, xmin=1.0/3.0, xmax=2.0/3.0, color=shading_color)
    pylab.axhspan(y_threshold, y_range[1], xmin=1.0/3.0, xmax=2.0/3.0, color=shading_color)

    # draw the lines for the specific reaction stoichiometries
    stoichiometries = [(1, 1, '-', '#e49b1c'), 
                       (1 ,2, '--', '#1ce463'), 
                       (2, 1, '--', '#1d1de3'), 
                       (2, 2, '-', '#e41c63')] 
    fig.hold(True)
    for n_s, n_p, style, color in stoichiometries:
        percent = 100.0 * stoich_counters.get((n_s, n_p), 0) / sum(stoich_counters.values())
        gamma = [(Keq / thermo.c_mid**(n_p - n_s)) ** (2.0/(n_p + n_s)) for Keq in x_range]
        pylab.plot(x_range, gamma, style, color=color, linewidth=3,
                   figure=fig, label="%d:%d (%d%%)" % (n_s, n_p, np.round(percent)))
    pylab.legend(loc='upper left')

    for Krev, Grev in regime_counters.keys():
        x_pos = x_threshold ** (Krev*2)
        y_pos = y_threshold ** (Grev*2)
        pylab.text(x_pos, y_pos, "%.1f%%" % (100.0 * regime_counters[Krev, Grev] / data_mat.shape[0]), 
                   horizontalalignment='center',
                   verticalalignment='center')

    pylab.xscale('log', figure=fig)
    pylab.yscale('log', figure=fig)
    pylab.ylim(y_range)
    pylab.xlim(x_range)
    pylab.xticks([1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9])
    pylab.yticks([1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9])
    html_writer.embed_matplotlib_figure(fig, width=400, height=400, name="reversibility_vs_keq")
   
    fig = pylab.figure(figsize=(2,2), dpi=90)
    abs_gamma = np.exp(abs(data_mat[:,1]))
    plotting.cdf(abs_gamma, label='gamma', figure=fig)
    pylab.plot([x_threshold, x_threshold], [0, 1], 'k--', figure=fig)
    pylab.xscale('log', figure=fig)
    #pylab.xlabel(r'$\hat{\gamma}$', figure=fig)
    #pylab.ylabel(r'CDF($\hat{\gamma}$)', figure=fig)
    pylab.text(1e6, 0.4, r'CDF($\hat{\gamma}$)', horizontalalignment='center',
               verticalalignment='center')
    pylab.xlim((1, 1e9))
    pylab.xticks([1, 1e3, 1e6, 1e9])
    pylab.yticks([0, 0.5, 1.0])
    pylab.tight_layout()
    html_writer.embed_matplotlib_figure(fig, width=125, height=125, name="reversibility_cdf")