def main(): html_writer = HtmlWriter("../res/formation_resolve.html") estimators = LoadAllEstimators() for name in ['alberty']: thermo = estimators[name] nist = Nist() nist.verify_formation(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.close()
def test_single_modules(mids): from pygibbs.groups import GroupContribution db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/thermodynamic_module_analysis.html") gc = GroupContribution(db, html_writer) gc.init() for mid in mids: html_writer.write("<h2>M%05d</h2>\n" % mid) S, rids, fluxes, cids = gc.kegg.get_module(mid) thermodynamic_pathway_analysis(S, rids, fluxes, cids, gc, html_writer)
def meta_regulated_rxns_cumul_plots(org, id, thermo): db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter('../res/' + org + id + '_regulation.html') metacyc_inst = MetaCyc(org, db) c_mid = 1e-4 cmap = GetConcentrationMap() pH, pMg, I, T = (7.0, 14.0, 0.25, 298.15) histogram = calculate_metacyc_regulation_reversibility_histogram(thermo, c_mid, pH, pMg, I, T, metacyc_inst, cmap=cmap, id=(org + id)) html_writer.write('<h1>Constrained co-factors</h1>') fig1 = plot_histogram(histogram, html_writer, title=('%s Reactions: With constraints on co-factors' % org), xlim=20, min_to_show=5, xmin=0, legend_loc='lower right') html_writer.embed_matplotlib_figure(fig1, width=640, height=480) pylab.savefig('../res/' + org + id + '_regulation.png', figure=fig1, format='png')
def __init__(self, html_fname): self.serv = None self.db = SqliteDatabase('channeling/channeling.sqlite', 'w') self.html_writer = HtmlWriter(html_fname) self.COMPOUND_TABLE_NAME = 'kegg_compounds' self.GENE_TABLE_NAME = 'kegg_genes' self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions' self.REACTION_TABLE_NAME = 'kegg_reactions' self.EQUATION_TABLE_NAME = 'kegg_equations' self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry' self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies' self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies' self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions' self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs' self.COFACTOR_TABLE_NAME = 'kegg_cofactors'
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid), 'name':name, 'error':cid2error.get(cid, None)} for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi compound_name = thermo.kegg.cid2name(cid) kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) null_html_writer = NullHtmlWriter() if csv_output_fname: csv_output = csv.writer(open(csv_output_fname, 'w')) csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries()) else: csv_output = None pH_vec = np.array( [7]) # this needs to be fixed so that the txt file will set the pH conc_vec = 10**(-np.arange(2, 6.0001, 0.25) ) # logarithmic scale between 10mM and 1nM override_bounds = {} fig = plt.figure(figsize=(6, 6), dpi=90) legend = [] for pH in pH_vec.flat: obd_vec = [] for conc in conc_vec.flat: override_bounds[cid] = (conc, conc) logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc)) data, labels = pareto(kegg_file, null_html_writer, thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obd_vec.append(data[:, 1]) csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat)) obd_mat = np.matrix( obd_vec) # rows are pathways and columns are concentrations plt.plot(conc_vec, obd_mat, '.-', figure=fig) legend += ['%s, pH = %g' % (l, pH) for l in labels] plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig) plt.xscale('log') plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(legend) html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix) html_writer.embed_matplotlib_figure(fig, name=prefix) html_writer.close()
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) xls_workbook = Workbook() logging.info("running OBD analysis for all pathways") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="pareto", balance_water=True, override_bounds={}) for d in data: sheet = xls_workbook.add_sheet(d['entry']) sheet.write(0, 0, "reaction") sheet.write(0, 1, "formula") sheet.write(0, 2, "flux") sheet.write(0, 3, "delta_r G'") sheet.write(0, 4, "shadow price") for r, rid in enumerate(d['rids']): sheet.write(r + 1, 0, rid) sheet.write(r + 1, 1, d['formulas'][r]) sheet.write(r + 1, 2, d['fluxes'][0, r]) sheet.write(r + 1, 3, d['dG_r_prime'][0, r]) sheet.write(r + 1, 4, d['reaction prices'][r, 0]) xls_workbook.save('%s.xls' % output_prefix) obds = [] minus_avg_tg = [] for i, d in enumerate(data): obds.append(d['OBD']) if d['sum of fluxes']: minus_avg_tg.append(-d['max total dG'] / d['sum of fluxes']) else: minus_avg_tg.append(0) fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(minus_avg_tg, obds, 'o', figure=fig) plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g') for i, name in enumerate(pathway_names): plt.text(minus_avg_tg[i], obds[i], name) plt.title('OBD vs. Average $\Delta_r G$') plt.ylim(ymin=0) plt.xlim(xmin=0) plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]') plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]') html_writer.write('<h2>Pareto figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def example_reductive(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) r = Reaction.FromFormula("3 C00011 => C00022") #r.Balance() pl.find_path("reductive", r)
def example_oxidative(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=10, maximal_dG=0, thermodynamic_method=OptimizationMethods.MAX_TOTAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl, NAD_only=False) r = Reaction.FromFormula("C00022 => 3 C00011") #r.Balance() pl.find_path("oxidative", r)
def runBeta2Alpha(thermo, reactionList): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/Beta2Alpha.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) for r in reactionList: pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r))) r = Reaction.FromFormula("C00099 => C01401") pl.find_path("Beta2Alpha", r)
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) xls_workbook = Workbook() logging.info("running OBD analysis for all pathways") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="pareto", balance_water=True, override_bounds={}) for d in data: sheet = xls_workbook.add_sheet(d['entry']) sheet.write(0, 0, "reaction") sheet.write(0, 1, "formula") sheet.write(0, 2, "flux") sheet.write(0, 3, "delta_r G'") sheet.write(0, 4, "shadow price") for r, rid in enumerate(d['rids']): sheet.write(r+1, 0, rid) sheet.write(r+1, 1, d['formulas'][r]) sheet.write(r+1, 2, d['fluxes'][0, r]) sheet.write(r+1, 3, d['dG_r_prime'][0, r]) sheet.write(r+1, 4, d['reaction prices'][r, 0]) xls_workbook.save('%s.xls' % output_prefix) obds = [] minus_avg_tg = [] for i, d in enumerate(data): obds.append(d['OBD']) if d['sum of fluxes']: minus_avg_tg.append(-d['max total dG']/d['sum of fluxes']) else: minus_avg_tg.append(0) fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(minus_avg_tg, obds, 'o', figure=fig) plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g') for i, name in enumerate(pathway_names): plt.text(minus_avg_tg[i], obds[i], name) plt.title('OBD vs. Average $\Delta_r G$') plt.ylim(ymin=0) plt.xlim(xmin=0) plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]') plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]') html_writer.write('<h2>Pareto figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def example_lower_glycolysis(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=8, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) #r = Reaction.FromFormula("C00003 + C00118 + C00001 => C00022 + C00004 + C00009") r = Reaction.FromFormula("C00118 => C00022") #r.Balance() pl.find_path("GAP => PYR", r)
def example_rpi_bypass(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=10, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) #add_redox_reactions(pl) pl.delete_reaction(1056) # ribose-phosphate isomerase pl.delete_reaction(1081) # ribose isomerase r = Reaction.FromFormula("C00117 => C01182") #r.Balance() pl.find_path("rpi_bypass", r)
def example_three_acetate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) #add_redox_reactions(pl) pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("C00031 => 3 C00033") #r.Balance() pl.find_path("three_acetate", r)
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = { 'cid': '<a href="%s">C%05d</a>' % (link, cid), 'name': name, 'error': cid2error.get(cid, None) } for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer( pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table( dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def example_glycolysis(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl, free_ATP_hydrolysis=False) ban_toxic_compounds(pl) #add_carbon_counts(pl) #r = Reaction.FromFormula("C00031 => 6 C06265") r = Reaction.FromFormula("C00031 + 3 C00008 => 2 C00186 + 3 C00002") #r.Balance() pl.find_path("GLC => 2 LAC, 3 ATP, No methylglyoxal", r)
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning( 'Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x: x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def example_more_than_two_pyruvate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) #add_cofactor_reactions(pl) #add_XTP_reactions(pl, '=>') #add_redox_reactions(pl) #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("3 C00031 + 3 C00011 + C00003 => 7 C00022 + 3 C00001 + C00004") r.Balance() pl.find_path("more_than_two_pyr", r)
def example_glucose_to_ethanol_and_formate(thermo): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) #add_cofactor_reactions(pl) #add_XTP_reactions(pl, '=>') #add_redox_reactions(pl) #pl.delete_reaction(761) # F6P + Pi = E4P + acetyl-P #pl.delete_reaction(1621) # X5P + Pi = GA3P + acetyl-P r = Reaction.FromFormula("2 C00031 + 3 C00001 => 6 C00058 + 3 C00469") r.Balance() pl.find_path("glucose_to_ethanol_and_formate", r)
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None): compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids]) pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default concentrations") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names) conc_vec = 10**(-ParseConcentrationRange(conc_range)) # logarithmic scale between 10mM and 1nM override_bounds = {} obd_mat = [] for conc in conc_vec.flat: for cid in cids: override_bounds[cid] = (conc, conc) logging.info("[%s] = %.1e M" % (compound_names, conc)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH'], conc] + obds) obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. [%s]" % (compound_names), figure=fig) plt.xscale('log') plt.ylim(ymin=0) plt.xlabel('[%s] (in M)' % compound_names, figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def calc_cons_rxns_corr(thermo, name): html_fname = '../res/' + name + '_rev_pair_corr.html' logging.info('Writing HTML output to %s', html_fname) html_writer = HtmlWriter(html_fname) c_mid = 1e-4 cmap = GetConcentrationMap() pH, pMg, I, T = (7.0, 3.0, 0.25, 298.15) (first, second) = get_reversibility_consecutive_pairs(thermo, c_mid, pH, pMg, I, T, cmap=cmap, id=name) html_writer.write('<h1>' + name + ': Constrained co-factors</h1><br>') fig1 = cons_pairs_dot_plot (first, second, xlim=50) html_writer.embed_matplotlib_figure(fig1, width=640, height=480) pylab.savefig('../res/' + name + '_rev_pairs_corr.png', figure=fig1, format='png') fig2 = cons_pairs_dot_plot (first, second, xlim=10) html_writer.embed_matplotlib_figure(fig2, width=640, height=480) pylab.savefig('../res/' + name + '_rev_pairs_corr_zoom.png', figure=fig2, format='png')
def example_formate(thermo, product_cid=22, co2_conc=1e-5): co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration])) carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T)) thermo.bounds[11] = (co2_conc, co2_conc) thermo.bounds[288] = (carbonate_conc, carbonate_conc) pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl, free_ATP_hydrolysis=True) add_redox_reactions(pl, NAD_only=False) pl.delete_reaction(134) # formate:NADP+ oxidoreductase pl.delete_reaction(519) # Formate:NAD+ oxidoreductase pl.delete_reaction(24) # Rubisco pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating) pl.delete_reaction(220) # L-serine ammonia-lyase pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming) pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake")) r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product #r.Balance() kegg = Kegg.getInstance() pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi compound_name = thermo.kegg.cid2name(cid) kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) null_html_writer = NullHtmlWriter() if csv_output_fname: csv_output = csv.writer(open(csv_output_fname, 'w')) csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries()) else: csv_output = None pH_vec = np.array([7]) # this needs to be fixed so that the txt file will set the pH conc_vec = 10**(-np.arange(2, 6.0001, 0.25)) # logarithmic scale between 10mM and 1nM override_bounds = {} fig = plt.figure(figsize=(6, 6), dpi=90) legend = [] for pH in pH_vec.flat: obd_vec = [] for conc in conc_vec.flat: override_bounds[cid] = (conc, conc) logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc)) data, labels = pareto(kegg_file, null_html_writer, thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obd_vec.append(data[:, 1]) csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat)) obd_mat = np.matrix(obd_vec) # rows are pathways and columns are concentrations plt.plot(conc_vec, obd_mat, '.-', figure=fig) legend += ['%s, pH = %g' % (l, pH) for l in labels] plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig) plt.xscale('log') plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(legend) html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix) html_writer.embed_matplotlib_figure(fig, name=prefix) html_writer.close()
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning('Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x:x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def runPathologic(thermo, reactionList): pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/mog_finder.html'), thermo=thermo, max_solutions=None, max_reactions=15, maximal_dG=-3.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl) add_redox_reactions(pl) for r in reactionList: pl.add_reaction(Reaction.FromFormula(r, "Auto generate #%s" % hash(r))) pl.delete_reaction(134) pl.delete_reaction(344) pl.delete_reaction(575) pl.delete_reaction(212) #pl.add_reaction(Reaction.FromFormula('C00149 + C00006 <=> C00036 + C00005 + C00080', # 'malate + NADP+ = oxaloacetate + NADPH',343)) #pl.add_reaction(Reaction.FromFormula('C00222 + C00010 + C00006 <=> C00083 + C00005', # 'malonate-semialdehyde + CoA + NADP+ = malonyl-CoA + NADPH',740)) r = Reaction.FromFormula("2 C00288 => C00048") pl.find_path("MOG_finder", r)
def find_path(self, experiment_name, net_reaction): """Find a pathway from the source to the target. Args: experiment_name: a name given to this experiment. net_reaction: a Reaction describing the net reaction for the desired paths """ dirname = os.path.join('../res/pathologic/', experiment_name) logging.info('Writing output to: %s' % dirname) util._mkdir(dirname) self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n') exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html') exp_html.write("<p><h1>%s</h1>\n" % experiment_name) exp_html.insert_toggle(div_id="__parameters__", start_here=True, label='Show Parameters') f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions() exp_html.write('<h2>Conditions:</h2>\n') exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method, 'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]), "Max Δ<sub>r</sub>G' = %.1f" % self.maximal_dG, 'pH = %g' % self.thermo.pH, 'I = %g' % self.thermo.I, 'T = %g' % self.thermo.T, 'Max no. reactions: %d' % (self.max_reactions or -1), 'Max no. solutions: %d' % (self.max_solutions or -1), 'Overall Reaction: %s' % net_reaction.to_hypertext(), '%d reactions' % len(reactions), '%d unique compounds' % len(compounds)]) exp_html.div_end() exp_html.write('</br>\n') logging.debug("All compounds:") for i, compound in enumerate(compounds): logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name)) logging.debug("All reactions:") for i, reaction in enumerate(reactions): logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction))) output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w') exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n' % experiment_name) # Find a solution with a minimal total flux logging.info("Preparing LP solver for the minimal total flux problem") exp_html.write('<b>Minimum flux</b>') slip = Stoichiometric_LP("Pathologic") slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0)) exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0)) logging.info("Solving") if not slip.solve(): exp_html.write("<b>There are no solutions!</b>") logging.warning("There are no solutions. Quitting!") return logging.info("writing solution") self.write_current_solution(exp_html, slip, experiment_name) logging.info("Preparing MILP solver") milp = Stoichiometric_LP("Pathologic") milp.solution_index = 1 milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) milp.add_milp_variables() if self.max_reactions is not None: milp.add_reaction_num_constraint(self.max_reactions) if self.thermodynamic_method == OptimizationMethods.LOCALIZED: milp.add_localized_dGf_constraints(self.thermo) else: milp.add_dGr_constraints(self.thermo, optimization=self.thermodynamic_method, maximal_dG=self.maximal_dG) index = 0 while (self.max_solutions is None) or (index < self.max_solutions): index += 1 # create the MILP problem to constrain the previous solutions not to reappear again. logging.info("Round %03d, solving using MILP" % (milp.solution_index)) milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index)) exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): ' % (index, experiment_name, index)) if not milp.solve(): exp_html.write("<b>No solution found</b>") logging.info("No more solutions. Quitting!") break logging.info("writing solution") self.write_current_solution(exp_html, milp, experiment_name, output_kegg_file) milp.ban_current_solution() output_kegg_file.close() exp_html.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data( "../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
args, _ = MakeOpts(estimators).parse_args(sys.argv) input_filename = os.path.abspath(args.input_filename) output_filename = os.path.abspath(args.output_filename) if not os.path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename print 'Will write output to %s' % output_filename db_loc = args.db_filename print 'Reading from DB %s' % db_loc db = SqliteDatabase(db_loc) thermo = estimators[args.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name kegg = Kegg.getInstance() thermo.bounds = deepcopy(kegg.cid2bounds) dirname = os.path.dirname(output_filename) if not os.path.exists(dirname): print 'Making output directory %s' % dirname _mkdir(dirname) print 'Executing thermodynamic pathway analysis' html_writer = HtmlWriter(output_filename) thermo_analyze = ThermodynamicAnalysis(db, html_writer, thermodynamics=thermo) thermo_analyze.analyze_pathway(input_filename)
continue if self.override_pMg or self.override_I or self.override_T: nist_row_copy = nist_row_data.Clone() if self.override_pMg: nist_row_copy.pMg = self.override_pMg if self.override_I: nist_row_copy.I = self.override_I if self.override_T: nist_row_copy.T = self.override_T rows.append(nist_row_copy) else: rows.append(nist_row_data) return rows def GetUniqueReactionSet(self): return set([row.reaction for row in self.data]) if __name__ == '__main__': #logging.getLogger('').setLevel(logging.DEBUG) _mkdir("../res/nist") html_writer = HtmlWriter("../res/nist/statistics.html") nist = Nist() fp = open('../res/nist_kegg_ids.txt', 'w') for cid in nist.GetAllCids(): fp.write("C%05d\n" % cid) fp.close() nist.AnalyzeStats(html_writer) nist.AnalyzeConnectivity(html_writer) html_writer.close()
def main(): html_writer = HtmlWriter("../res/nist/report.html") estimators = LoadAllEstimators() nist = Nist() nist.T_range = (273.15 + 24, 273.15 + 40) #nist.override_I = 0.25 #nist.override_pMg = 14.0 #nist.override_T = 298.15 html_writer.write('<p>\n') html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data)) html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \ (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist()))) html_writer.write('</p>\n') reactions = {} reactions['KEGG'] = [] for reaction in Kegg.getInstance().AllReactions(): try: reaction.Balance(balance_water=True, exception_if_unknown=True) reactions['KEGG'].append(reaction) except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError): pass reactions['FEIST'] = Feist.FromFiles().reactions reactions['NIST'] = nist.GetUniqueReactionSet() pairs = [] #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')] for t1, t2 in pairs: logging.info('Writing the NIST report for %s vs. %s' % (estimators[t1].name, estimators[t2].name)) html_writer.write('<p><b>%s vs. %s</b> ' % (estimators[t1].name, estimators[t2].name)) html_writer.insert_toggle(start_here=True) two_way_comparison(html_writer=html_writer, thermo1=estimators[t1], thermo2=estimators[t2], reaction_list=reactions['FEIST'], name='%s_vs_%s' % (t1, t2)) html_writer.div_end() html_writer.write('</p>') if False: estimators['alberty'].CompareOverKegg(html_writer, other=estimators['PRC'], fig_name='kegg_compare_alberty_vs_nist') rowdicts = [] rowdict = {'Method': 'Total'} for db_name, reaction_list in reactions.iteritems(): rowdict[db_name + ' coverage'] = len(reaction_list) rowdicts.append(rowdict) for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']: thermo = estimators[name] logging.info('Writing the NIST report for %s' % thermo.name) html_writer.write('<p><b>%s</b> ' % thermo.name) html_writer.insert_toggle(start_here=True) num_estimations, rmse = nist.verify_results(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.div_end() html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse)) logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse)) rowdict = {'Method':thermo.name, 'RMSE (kJ/mol)':"%.1f (N=%d)" % (rmse, num_estimations)} for db_name, reaction_list in reactions.iteritems(): n_covered = thermo.CalculateCoverage(reaction_list) percent = n_covered * 100.0 / len(reaction_list) rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered) logging.info(db_name + " coverage = %.1f%%" % percent) rowdicts.append(rowdict) headers = ['Method', 'RMSE (kJ/mol)'] + \ [db_name + ' coverage' for db_name in reactions.keys()] html_writer.write_table(rowdicts, headers=headers)
def analyse_reversibility(thermo, name): html_fname = '../res/' + name + '_reversibility.html' logging.info('Writing HTML output to %s', html_fname) html_writer = HtmlWriter(html_fname) cmap = GetConcentrationMap() histogram, rel_histogram, perc_first_max = calculate_reversibility_histogram( thermo, cmap=cmap, id=name) html_writer.write('<h1>' + name + ': Constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max) # deltaG plot fig1 = plot_histogram(histogram, html_writer, title='With constraints on co-factors', legend_loc='lower right' , xlim=80) fig1 = plot_histogram(histogram, html_writer, title='With constraints on co-factors', xlim=10) html_writer.embed_matplotlib_figure(fig1, width=640, height=480) pylab.savefig('../res/' + name + '_kegg_reversibility1.png', figure=fig1, format='png') #fig1_bs = plot_bootstrap_stats(histogram, title='With constraints on co-factors') #html_writer.embed_matplotlib_figure(fig1_bs, width=640, height=480) #pylab.savefig('../res/' + name + '_kegg_reversibility1_bs.png', figure=fig1_bs, format='png') fig1_rel = plot_histogram(rel_histogram, html_writer, title='Normed per module with constraints on co-factors', xlim=5) html_writer.embed_matplotlib_figure(fig1_rel, width=640, height=480) pylab.savefig('../res/' + name + '_kegg_reversibility1_rel.png', figure=fig1_rel, format='png') histogram, rel_histogram, perc_first_max = calculate_reversibility_histogram( thermo, cmap={}, id=name) html_writer.write('<h1>' + name + ': Non constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max) fig2 = plot_histogram(histogram, html_writer, title='No constraints on co-factors', xlim=20) html_writer.embed_matplotlib_figure(fig2, width=640, height=480) pylab.savefig('../res/' + name + '_kegg_reversibility2.png', figure=fig2, format='png') fig2_rel = plot_histogram(rel_histogram, html_writer, title='Normed per module, no constraints on co-factors', xlim=5) html_writer.embed_matplotlib_figure(fig2_rel, width=640, height=480) pylab.savefig('../res/' + name + '_kegg_reversibility2_rel.png', figure=fig2_rel, format='png')
def metacyc_data(org, id, thermo, max_pathway_length_for_fig=8): db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter('../res/' + org + '_' + id + '_reversibility.html') metacyc_inst = MetaCyc(org, db) cmap = GetConcentrationMap() (histogram,rel_histogram,perc_first_max, reg_hist) = calculate_metacyc_reversibility_histogram(thermo, metacyc_inst, cmap=cmap, id=(org + '_' + id)) html_writer.write('<h1>Constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max) # deltaG plot fig1 = plot_histogram(histogram, html_writer, title=('%s pathways: With constraints on co-factors' % org), legend_loc='lower right' , xlim=80) fig1 = plot_histogram(histogram, html_writer, title=('%s pathways: With constraints on co-factors' % org), xlim=10) html_writer.embed_matplotlib_figure(fig1, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reversibility1.png', figure=fig1, format='png') fig1_reg = plot_bars(reg_hist, title=('%s pathways: Position of regulated reactions' % org), max_pathway_length=max_pathway_length_for_fig) html_writer.embed_matplotlib_figure(fig1_reg, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reg_rxns.png', figure=fig1_reg, format='png') fig1_reg_stacked = plot_stacked_bars(reg_hist, title=('%s pathways: Position of regulated reactions' % org), max_pathway_length=max_pathway_length_for_fig) html_writer.embed_matplotlib_figure(fig1_reg_stacked, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reg_stacked_rxns.png', figure=fig1_reg_stacked, format='png') #fig1_bs = plot_bootstrap_stats(histogram, title=('%s pathways: With constraints on co-factors' % org)) #html_writer.embed_matplotlib_figure(fig1_bs, width=640, height=480) #pylab.savefig('../res/' + org + '_' + id + '_reversibility1_bs.png', figure=fig1_bs, format='png') fig1_rel = plot_histogram(rel_histogram, html_writer, title=('%s pathways: Normed per pathway with constraints on co-factors' % org), xlim=5) html_writer.embed_matplotlib_figure(fig1_rel, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reversibility1_rel.png', figure=fig1_rel, format='png') (histogram,rel_histogram,perc_first_max, reg_hist) = calculate_metacyc_reversibility_histogram(thermo, metacyc_inst, cmap={}, id=(org + '_' + id)) html_writer.write('<h1>Non constrained co-factors</h1>Percentage of modules where first reaction is the maximal: %f<br>' % perc_first_max) fig2 = plot_histogram(histogram, html_writer, title=('%s pathways: No constraints on co-factors' % org ), xlim=20) html_writer.embed_matplotlib_figure(fig2, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reversibility2.png', figure=fig1, format='png') fig2_rel = plot_histogram(rel_histogram, html_writer, title=('%s pathways: Normed per pathway no constraints on co-factors' % org), xlim=5) html_writer.embed_matplotlib_figure(fig2_rel, width=640, height=480) pylab.savefig('../res/' + org + '_' + id + '_reversibility2_rel.png', figure=fig2_rel, format='png')
'--leave_one_out', action='store_true', default=False, help='A flag for running the Leave One Out analysis') return parser if __name__ == "__main__": logger = logging.getLogger('') logger.setLevel(logging.DEBUG) parser = MakeOpts() args = parser.parse_args() util._mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') html_writer = HtmlWriter('../res/ugc.html') ugc = UnifiedGroupContribution(db, html_writer, anchor_all=args.anchor_all_formations) ugc.LoadGroups(FromDatabase=(not args.recalc_groups)) ugc.LoadObservations(FromDatabase=(not args.recalc_observations)) ugc.LoadGroupVectors(FromDatabase=(not args.recalc_groupvectors)) ugc.LoadData(FromDatabase=(not args.recalc_matrices)) if args.dump: ugc.SaveDataToMatfile() sys.exit(0) if args.train: ugc.EstimateKeggCids() sys.exit(0)
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase("../res/gibbs.sqlite") public_db = SqliteDatabase("../data/public_data.sqlite") output_filename = os.path.abspath(options.output_filename) logging.info('Will write output to %s' % output_filename) html_writer = HtmlWriter(output_filename) nist = Nist(T_range=None) nist_regression = NistRegression(db, html_writer=html_writer, nist=nist) nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40) #nist_regression.nist.override_I = 0.25 #nist_regression.nist.override_pMg = 14.0 html_writer.write("<h2>NIST regression:</h2>") if options.use_prior: logging.info('Using the data from Alberty as fixed prior') prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase( public_db, 'alberty_pseudoisomers', name="Alberty") else: prior_thermo = None html_writer.write('</br><b>Regression Tables</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.Train(options.from_database, prior_thermo) html_writer.div_end() html_writer.write('</br><b>PRC results</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.WriteDataToHtml(html_writer) html_writer.div_end() html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n') html_writer.insert_toggle(start_here=True) N, rmse = nist_regression.VerifyResults() html_writer.div_end() logging.info("Regression results for transformed data:") logging.info("N = %d, RMSE = %.1f" % (N, rmse)) html_writer.close()
return parser if __name__ == '__main__': parser = MakeOpts() args = parser.parse_args() util._mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') if args.transformed: prefix = 'bgc' else: prefix = 'pgc' if args.test_only: html_writer = HtmlWriter('../res/%s_test.html' % prefix) elif args.train_only: html_writer = HtmlWriter('../res/%s_train.html' % prefix) else: html_writer = HtmlWriter('../res/%s.html' % prefix) G = GroupContribution(db=db, html_writer=html_writer, transformed=args.transformed) G.LoadGroups(FromDatabase=args.from_database, FromFile=args.groups_species) G.LoadObservations(args.from_database) G.LoadGroupVectors(args.from_database) if args.test_only: G.LoadContributionsFromDB()
#m = Molecule.FromInChI('InChI=1/CO2/c2-1-3'); m.SetTitle('CO2') #m = Molecule.FromInChI('InChI=1/CO/c1-2'); m.SetTitle('CO') #m = Molecule.FromInChI('InChI=1/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1'); m.SetTitle('ATP') #m = Molecule.FromSmiles("P(=O)(O)(O)O") #print m.ToFormat('mol') #print m.ToFormat('mol2') #print m.ToFormat('smi') #print m.ToFormat('inchi') #print m.ToFormat('sdf') diss_table = Molecule._GetDissociationTable('C(=O)(O)CN', fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) print "glycine\n", diss_table html_writer = HtmlWriter('../res/molecule.html') from pygibbs.kegg import Kegg kegg = Kegg.getInstance() html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n') for cid in [41]: m = kegg.cid2mol(cid) html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m))) diss_table = m.GetDissociationTable() pmap = diss_table.GetPseudoisomerMap() diss_table.WriteToHTML(html_writer) pmap.WriteToHTML(html_writer) html_writer.write("</p>\n") #print m.GetDissociationConstants() #print m.GetMacrospecies() #obmol = m.ToOBMol()
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None): compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids]) pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default concentrations") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names) conc_vec = 10**(-ParseConcentrationRange(conc_range) ) # logarithmic scale between 10mM and 1nM override_bounds = {} obd_mat = [] for conc in conc_vec.flat: for cid in cids: override_bounds[cid] = (conc, conc) logging.info("[%s] = %.1e M" % (compound_names, conc)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH'], conc] + obds) obd_mat = np.matrix( obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. [%s]" % (compound_names), figure=fig) plt.xscale('log') plt.ylim(ymin=0) plt.xlabel('[%s] (in M)' % compound_names, figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
class KeggGenes(object): def __init__(self, html_fname): self.serv = None self.db = SqliteDatabase('channeling/channeling.sqlite', 'w') self.html_writer = HtmlWriter(html_fname) self.COMPOUND_TABLE_NAME = 'kegg_compounds' self.GENE_TABLE_NAME = 'kegg_genes' self.GENE_REACTION_TABLE_NAME = 'kegg_genes_to_reactions' self.REACTION_TABLE_NAME = 'kegg_reactions' self.EQUATION_TABLE_NAME = 'kegg_equations' self.STOICHIOMETRY_TABLE_NAME = 'kegg_stoichiometry' self.GIBBS_ENERGY_TABLE_NAME = 'kegg_gibbs_energies' self.GENE_ENERGY_TABLE_NAME = 'kegg_gene_energies' self.FUNCTIONAL_INTERATCTIONS_TABLE = 'parkinson_functional_interactions' self.GENE_PAIRS_TABLE_NAME = 'kegg_gene_pairs' self.COFACTOR_TABLE_NAME = 'kegg_cofactors' def GetAllCompounds(self): self.db.CreateTable(self.COMPOUND_TABLE_NAME, "compound INT, name TEXT, all_names TEXT", drop_if_exists=True) self.db.CreateIndex('compound_idx', self.COMPOUND_TABLE_NAME, 'compound', unique=True, drop_if_exists=True) f = urllib.urlopen('http://rest.kegg.jp/list/cpd/') for row in f.read().split('\n'): if row.strip() == '': continue if row.find('\t') != -1: compound, all_names = row.split('\t', 1) else: raise ValueError('Bad compound name: ' + row) name = all_names.split(';')[0] self.db.Insert(self.COMPOUND_TABLE_NAME, [compound, name, all_names]) self.db.Commit() def GetAllGenes(self, organism='eco'): self.db.CreateTable(self.GENE_TABLE_NAME, ['organism', 'gene', 'desc'], drop_if_exists=False) self.db.CreateIndex('gene_idx', self.GENE_TABLE_NAME, 'gene', unique=False, drop_if_exists=False) self.db.Execute("DELETE FROM %s WHERE organism = '%s'" % (self.GENE_TABLE_NAME, organism)) f = urllib.urlopen('http://rest.kegg.jp/list/%s/' % organism) for row in f.read().split('\n'): if row.strip() == '': continue gene, desc = row.split('\t') self.db.Insert(self.GENE_TABLE_NAME, [organism, gene, desc]) self.db.Commit() def GetAllReactions(self, organism='eco'): self.db.CreateTable(self.GENE_REACTION_TABLE_NAME, ['organism', 'gene', 'reaction'], drop_if_exists=False) self.db.CreateIndex('reaction_gene_idx', self.GENE_REACTION_TABLE_NAME, 'gene', unique=False, drop_if_exists=False) self.db.CreateIndex('reaction_idx', self.GENE_REACTION_TABLE_NAME, 'reaction', unique=False, drop_if_exists=False) self.db.Execute("DELETE FROM %s WHERE organism = '%s'" % (self.GENE_REACTION_TABLE_NAME, organism)) f = urllib.urlopen('http://rest.kegg.jp/link/rn/%s' % organism) for row in f.read().split('\n'): if row.strip() == '': continue gene, reaction = row.split('\t') self.db.Insert(self.GENE_REACTION_TABLE_NAME, [organism, gene, reaction]) self.db.Commit() def GetAllEquations(self): self.db.CreateTable(self.EQUATION_TABLE_NAME, ['reaction', 'equation'], drop_if_exists=True) self.db.CreateIndex('equation_reaction_idx', self.EQUATION_TABLE_NAME, 'reaction', unique=False, drop_if_exists=True) self.db.CreateIndex('equation_idx', self.EQUATION_TABLE_NAME, 'equation', unique=False, drop_if_exists=True) all_reactions = [] for row in self.db.Execute("SELECT distinct(reaction) FROM %s" % (self.GENE_REACTION_TABLE_NAME)): all_reactions.append(str(row[0])) for reaction in all_reactions: f = urllib.urlopen('http://rest.kegg.jp/get/%s' % reaction) for equation in self._ReadReactionEntries(f.read()): self.db.Insert(self.EQUATION_TABLE_NAME, [reaction, equation]) sys.stderr.write('Equation for reaction %s: %s\n' % (reaction, equation)) self.db.Commit() def _ReadReactionEntries(self, s): equation_list = [] entry2fields_map = kegg_parser.ParsedKeggFile.FromKeggAPI(s) for key in sorted(entry2fields_map.keys()): field_map = entry2fields_map[key] if "EQUATION" in field_map: equation_list.append(field_map["EQUATION"]) return equation_list def GetStoichiometries(self): self.db.CreateTable(self.STOICHIOMETRY_TABLE_NAME, "equation TEXT, compound TEXT, coefficient REAL", drop_if_exists=True) self.db.CreateIndex('stoichiometry_equation_idx', self.STOICHIOMETRY_TABLE_NAME, 'equation', unique=False, drop_if_exists=True) self.db.CreateIndex('stoichiometry_compound_idx', self.STOICHIOMETRY_TABLE_NAME, 'compound', unique=False, drop_if_exists=True) all_kegg_reactions = [] all_equations = [] for row in self.db.Execute("SELECT distinct(equation) FROM %s" % (self.EQUATION_TABLE_NAME)): try: r = Reaction.FromFormula(str(row[0])) all_equations.append(str(row[0])) all_kegg_reactions.append(r) except (KeggParseException, KeggNonCompoundException): pass for i, equation in enumerate(all_equations): for compound, coefficient in all_kegg_reactions[i].iteritems(): self.db.Insert(self.STOICHIOMETRY_TABLE_NAME, [equation, "cpd:C%05d" % compound, coefficient]) self.db.Commit() def GetForamtionEnergies(self, thermo): self.db.CreateTable(self.GIBBS_ENERGY_TABLE_NAME, "equation TEXT, dG0 REAL, dGc REAL", drop_if_exists=True) self.db.CreateIndex('gibbs_equation_idx', self.GIBBS_ENERGY_TABLE_NAME, 'equation', unique=True, drop_if_exists=True) all_equations = set() for row in self.db.Execute("SELECT distinct(equation) FROM %s" % (self.EQUATION_TABLE_NAME)): all_equations.add(str(row[0])) from pygibbs.kegg import Kegg kegg = Kegg.getInstance() all_kegg_cids = set(kegg.get_all_cids()) for equation in all_equations: try: rxn = Reaction.FromFormula(equation) if not rxn.get_cids().issubset(all_kegg_cids): raise KeggNonCompoundException rxn.Balance(balance_water=True, exception_if_unknown=True) dG0 = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1)[0, 0] dGc = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1e-3)[0, 0] self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, dG0, dGc]) except (KeggParseException, KeggNonCompoundException, KeggReactionNotBalancedException): self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, None, None]) self.db.Commit() def LoadCofactors(self): self.db.CreateTable(self.COFACTOR_TABLE_NAME, 'compound TEXT, name TEXT, c_min REAL, c_max REAL, ref TEXT', drop_if_exists=True) self.db.CreateIndex('cofactor_idx', self.COFACTOR_TABLE_NAME, 'compound', unique=True, drop_if_exists=True) csv_reader = csv.DictReader(open('channeling/cofactors.csv', 'r')) for rowdict in csv_reader: self.db.Insert(self.COFACTOR_TABLE_NAME, ["cpd:C%05d" % int(rowdict['cid']), rowdict['name'], float(rowdict['c_min'] or np.nan), float(rowdict['c_max'] or np.nan), rowdict['ref']]) self.db.Commit() def CreateGeneEnergyTable(self): self.db.CreateTable(self.GENE_ENERGY_TABLE_NAME, "gene TEXT, reaction TEXT, dGc REAL, compound INT, coefficient REAL", drop_if_exists=True) self.db.CreateIndex('gene_energy_compound_idx', self.GENE_ENERGY_TABLE_NAME, 'compound', unique=False) self.db.CreateIndex('gene_energy_gene_idx', self.GENE_ENERGY_TABLE_NAME, 'gene', unique=False) query = """ INSERT INTO %s (gene, reaction, dGc, compound, coefficient) SELECT gen.gene, rxn.reaction, eng.dGc, sto.compound, sto.coefficient FROM kegg_genes gen, kegg_genes_to_reactions rxn, kegg_equations eqn, kegg_gibbs_energies eng, kegg_stoichiometry sto WHERE gen.organism = 'eco' AND gen.gene = rxn.gene AND rxn.reaction = eqn.reaction AND eqn.equation = eng.equation AND eng.dG0 IS NOT NULL AND eqn.equation = sto.equation """ % self.GENE_ENERGY_TABLE_NAME self.db.Execute(query) query = """ INSERT INTO %s (gene, reaction, dGc, compound, coefficient) SELECT gen.gene, rxn.reaction, -eng.dGc, sto.compound, -sto.coefficient FROM kegg_genes gen, kegg_genes_to_reactions rxn, kegg_equations eqn, kegg_gibbs_energies eng, kegg_stoichiometry sto WHERE gen.organism = 'eco' AND gen.gene = rxn.gene AND rxn.reaction = eqn.reaction AND eqn.equation = eng.equation AND eng.dG0 IS NOT NULL AND eqn.equation = sto.equation """ % self.GENE_ENERGY_TABLE_NAME self.db.Execute(query) self.db.Commit() def CreateGenePairsTable(self): self.db.CreateTable(self.GENE_PAIRS_TABLE_NAME, "gene1 TEXT, gene2 TEXT, reaction1 TEXT, reaction2 TEXT, " "compound TEXT, coeff1 REAL, coeff2 REAL, dGc1 REAL, " "dGc2 REAL, score REAL", drop_if_exists=True) self.db.CreateIndex('gene_pairs_gene_idx', self.GENE_PAIRS_TABLE_NAME, 'gene1, gene2', unique=False) query = """ INSERT INTO %s (gene1, gene2, reaction1, reaction2, compound, coeff1, coeff2, dGc1, dGc2, score) SELECT p.*, pfi.score FROM ( SELECT kge1.gene gene1, kge2.gene gene2, kge1.reaction reaction1, kge2.reaction reaction2, kge1.compound compound, kge1.coefficient coeff1, kge2.coefficient coeff2, cast(kge1.dGc as real) dGc1, cast(kge2.dGc as real) dGc2 FROM kegg_gene_energies kge1, kegg_gene_energies kge2 WHERE kge1.compound = kge2.compound AND kge1.compound NOT IN (SELECT compound FROM %s) AND kge1.gene != kge2.gene AND kge1.reaction != kge2.reaction AND kge1.coefficient > 0 AND kge2.coefficient < 0 ) p LEFT OUTER JOIN %s pfi ON (pfi.gene1 = p.gene1 AND pfi.gene2 = p.gene2 OR pfi.gene1 = p.gene2 AND pfi.gene2 = p.gene1) """ % (self.GENE_PAIRS_TABLE_NAME, self.COFACTOR_TABLE_NAME, self.FUNCTIONAL_INTERATCTIONS_TABLE) self.db.Execute(query) self.db.Commit() def Correlate(self, dGc1_lower, dGc2_upper, reverse=False): if reverse: cond = "kgp.dGc1 < %d AND kgp.dGc2 > %d" % (dGc1_lower, dGc2_upper) else: cond = "kgp.dGc1 > %d AND kgp.dGc2 < %d" % (dGc1_lower, dGc2_upper) query = """ SELECT kgp.gene1, kgp.gene2, sum(%s) nqual, count(*) ntot, max(score) FROM %s kgp GROUP BY kgp.gene1, kgp.gene2 """ % (cond, self.GENE_PAIRS_TABLE_NAME) counters = np.zeros((2, 2)) for row in self.db.Execute(query): _gene1, _gene2, nqual, _ntot, score = row i = int(score is not None) # is there an PP-interaction j = int(nqual > 0) # is this a qualifying pair (thermodynamically) counters[i, j] += 1.0 _inter0 = np.sum(counters[0, :]) inter1 = np.sum(counters[1, :]) qual0 = np.sum(counters[:, 0]) qual1 = np.sum(counters[:, 1]) total = np.sum(counters.flat) print "-" * 50 if reverse: print "Checking criterion: first < %d and second > %d" % (dGc1_lower, dGc2_upper) else: print "Checking criterion: first > %d and second < %d" % (dGc1_lower, dGc2_upper) print "Total no. of pairs = %d" % total print "interaction rate among all pairs (%d out of %d) = %.2f%%" % (inter1, total, 100*(inter1 / total)) print "qualification rate among all pairs (%d out of %d) = %.2f%%" % (qual1, total, 100*(qual1 / total)) print "interactions between unqualifying pairs (%d out of %d) = %.2f%%" % (counters[1,0], qual0, 100*(counters[1,0] / qual0)) print "interactions between qualifying pairs (%d out of %d) = %.2f%%" % (counters[1,1], qual1, 100*(counters[1,1] / qual1)) return counters[1,0] / qual0, counters[1,1] / qual1 def LoadFunctionalInteractions(self, fname='../data/proteomics/coli/functional_interactions.txt'): self.db.CreateTable(self.FUNCTIONAL_INTERATCTIONS_TABLE, ['gene1', 'gene2', 'score'], drop_if_exists=True) self.db.CreateIndex('interaction_gene_idx', self.FUNCTIONAL_INTERATCTIONS_TABLE, 'gene1, gene2', unique=False) tsv = csv.reader(open(fname, 'r'), delimiter='\t') for row in tsv: if row[0][0] == '#': continue gene1 = 'eco:' + row[0].lower() gene2 = 'eco:' + row[1].lower() score = float(row[2]) self.db.Insert(self.FUNCTIONAL_INTERATCTIONS_TABLE, [gene1, gene2, score]) self.db.Commit() def PlotScatter(self): query = """ SELECT p.g1, p.g2, pfi.score FROM ( SELECT kgp.gene1 gene1, kgp.gene2 gene2, cast(kgp.dGc1 as real) g1, cast(kgp.dGc2 as real) g2 FROM %s kgp ) p LEFT OUTER JOIN %s pfi ON (pfi.gene1 = p.gene1 AND pfi.gene2 = p.gene2 OR pfi.gene1 = p.gene2 AND pfi.gene2 = p.gene1) """ % (self.GENE_PAIRS_TABLE_NAME, self.FUNCTIONAL_INTERATCTIONS_TABLE) data = [] for row in self.db.Execute(query): g1, g2, score = row data.append([float(g1), float(g2), float(score or 0)]) data = np.matrix(data) ind1 = list(np.where(data[:, 2] > 0)[0].flat) ind2 = list(np.where(data[:, 2] == 0)[0].flat) fig = plt.figure(figsize=(6,6), dpi=90) plt.plot(data[ind2, 0], data[ind2, 1], 'r.', markersize=5, figure=fig) plt.plot(data[ind1, 0], data[ind1, 1], 'g.', markersize=5, figure=fig) plt.show() def PlotCDF(self): special_pairs = {('eco:b3236', 'eco:b0720'):"mdh:gltA", # malate dehydrogenase -> oxaloacetate -> citrate synthase ('eco:b1263', 'eco:b1264'):"trpD:trpE"} # trpD -> chorismate -> trpE (two components of anthraline synthase) query = """ SELECT gene1, gene2, min(dGc2 - dGc1), max(score) FROM %s WHERE dGc1 + dGc2 < 0 AND dGc1 > 10 GROUP BY gene1, gene2 """ % (self.GENE_PAIRS_TABLE_NAME) data = [] markers = [] for row in self.db.Execute(query): gene1, gene2, ddG, score = row if (gene1, gene2) in special_pairs: markers.append((special_pairs[(gene1, gene2)], ddG)) data.append([ddG, float(score or 0)]) data = np.matrix(data) ind1 = list(np.where(data[:, 1] > 0)[0].flat) ind2 = list(np.where(data[:, 1] == 0)[0].flat) fig = plt.figure(figsize=(6,6), dpi=90) cdf((data[ind2, 0]).flat, label="non-interacting (N = %d)" % len(ind2), style='r', figure=fig) cdf((data[ind1, 0]).flat, label="interacting (N = %d)" % len(ind1), style='g', figure=fig) for label, ddG in markers: plt.plot([ddG, ddG], [0, 1], 'b--', figure=fig) plt.text(ddG, 0.1, label) plt.xlim(-500, 500) plt.xlabel(r"$\Delta G'^c$ (2nd) - $\Delta G'^c$ (1st) [kJ/mol]") plt.ylabel(r"Cumulative Distribution Function") plt.legend(loc="upper left") self.html_writer.embed_matplotlib_figure(fig, width=400, height=400, name='channeling_cdf') def PrintEnergies(self): query = """ SELECT e.reaction, e.equation, g.dG0, g.dGc FROM kegg_equations e, kegg_gibbs_energies g WHERE e.equation = g.equation """ self.html_writer.write('<font size="1">\n') column_names = ['KEGG Reaction', 'Formula', 'dG0', 'dGc'] self.db.Query2HTML(self.html_writer, query, column_names) self.db.Query2CSV('../res/channeling_energy_tabel.csv', query, column_names) self.html_writer.write('</font>\n') def PrintPairs(self): query = """ SELECT g.gene1, g.gene2, c.name, g.reaction1, g.reaction2, cast(g.dG1 as int), cast(g.dG2 as int), cast(g.ddG as int), kg1.desc, kg2.desc, g.score FROM (SELECT gene1, gene2, reaction1, reaction2, compound, max(dGc1) dG1, min(dGc2) dG2, min(dGc2 - dGc1) ddG, max(score) score FROM kegg_gene_pairs WHERE dGc1 + dGc2 < 1000000 AND dGc1 > -1000000 GROUP BY gene1, gene2, compound ORDER BY ddG) g, kegg_genes kg1, kegg_genes kg2, kegg_compounds c WHERE g.gene1 = kg1.gene AND g.gene2 = kg2.gene AND c.compound = g.compound """ self.html_writer.write('<font size="1">\n') column_names = ['Gene 1', 'Gene 2', 'Common Compound', 'Reaction 1', 'Reaction 2', 'dGc1', 'dGc2', 'dG2-dG1', 'Desc 1', 'Desc 2', 'Score'] self.db.Query2HTML(self.html_writer, query, column_names) self.db.Query2CSV('../res/channeling_pairs_table.csv', query, column_names) self.html_writer.write('</font>\n') def PrintAllPairs(self): query = """ SELECT g.gene1, g.gene2, c.name, g.reaction1, g.reaction2, g.dG1, g.dG2 FROM (SELECT gene1, gene2, reaction1, reaction2, compound, max(dGc1) dG1, min(dGc2) dG2 FROM kegg_gene_pairs GROUP BY gene1, gene2, compound ORDER BY gene1, gene2, reaction1, reaction2, compound) g, kegg_genes kg1, kegg_genes kg2, kegg_compounds c WHERE g.gene1 = kg1.gene AND g.gene2 = kg2.gene AND c.compound = g.compound """ self.html_writer.write('<font size="1">\n') column_names = ['Gene 1', 'Gene 2', 'Common Compound', 'Reaction 1', 'Reaction 2', 'dGc1', 'dGc2'] self.db.Query2HTML(self.html_writer, query, column_names) self.db.Query2CSV('../res/channeling_all_pairs_table.csv', query, column_names) self.html_writer.write('</font>\n')
if __name__ == "__main__": kegg = Kegg.getInstance() graph = {} for rid in kegg.get_all_rids(): r = kegg.rid2reaction(rid) for cid1 in r.sparse.keys(): for cid2 in r.sparse.keys(): if r.sparse[cid1] * r.sparse[cid2] < 0: graph.setdefault(cid1, set()).add(cid2) queue = [355] cofactors = set([1,2,3,4,5,6,7,8,9,10,11,13,14,20,28,30]) html_writer = HtmlWriter('../res/kegg_bfs.html') for i in xrange(3): next_queue = set() cofactors.update(queue) while queue: cid = queue.pop(0) next_queue.update(graph[cid]) queue = list(next_queue.difference(cofactors)) for cid in queue: try: html_writer.write(kegg.cid2mol(cid).ToSVG()) html_writer.write(kegg.cid2name(cid)) except (KeggParseException, OpenBabelError): html_writer.write(kegg.cid2name(cid))
def WriteUniqueReactionReport(self, unique_sparse_reactions, unique_nist_row_representatives, unique_data_mat, full_data_mat, cid2nH_nMg=None): total_std = full_data_mat[2:4, :].std(1) fig = plt.figure() plt.plot(unique_data_mat[2, :].T, unique_data_mat[3, :].T, '.') plt.xlabel("$\sigma(\Delta_r G^\circ)$") plt.ylabel("$\sigma(\Delta_r G^{\'\circ})$") plt.title('$\sigma_{total}(\Delta_r G^\circ) = %.1f$ kJ/mol, ' '$\sigma_{total}(\Delta_r G^{\'\circ}) = %.1f$ kJ/mol' % (total_std[0, 0], total_std[1, 0])) self.html_writer.embed_matplotlib_figure(fig, width=640, height=480) logging.info('std(dG0_r) = %.1f' % total_std[0, 0]) logging.info('std(dG\'0_r) = %.1f' % total_std[1, 0]) rowdicts = [] for i, reaction in enumerate(unique_sparse_reactions): logging.debug('Analyzing unique reaction: ' + str(unique_sparse_reactions[i])) ddG0 = self.GetDissociation().ReverseTransformReaction(reaction, pH=7, I=0.1, pMg=10, T=298.15, cid2nH_nMg=cid2nH_nMg) d = {} d["_reaction"] = reaction.to_hypertext(show_cids=False) d["reaction"] = reaction.FullReactionString(show_cids=False) # no hypertext for the CSV output d["Reference ID"] = unique_nist_row_representatives[i].ref_id d["EC"] = unique_nist_row_representatives[i].ec d["E(" + symbol_dr_G0 + ")"] = unique_data_mat[0, i] d["E(" + symbol_dr_G0_prime + ")"] = unique_data_mat[1, i] d["E(" + symbol_dr_G0 + ")'"] = unique_data_mat[0, i] + ddG0 d["std(" + symbol_dr_G0 + ")"] = unique_data_mat[2, i] d["std(" + symbol_dr_G0_prime + ")"] = unique_data_mat[3, i] d["diff"] = unique_data_mat[2, i] - unique_data_mat[3, i] d["#observations"] = "%d" % unique_data_mat[4, i] flag = 0 c_nad = reaction.sparse.get(3, 0) c_nadh = reaction.sparse.get(4, 0) c_nadp = reaction.sparse.get(6, 0) c_nadph = reaction.sparse.get(5, 0) if c_nad == 1 and c_nadh == -1: flag = 1 elif c_nad == -1 and c_nadh == 1: flag = -1 elif c_nadp == 1 and c_nadph == -1: flag = 2 elif c_nadp == -1 and c_nadph == 1: flag = -2 d["Arren Flag"] = flag if d["diff"] > self.std_diff_threshold: _mkdir('../res/prc_reactions') link = "prc_reactions/%s.html" % reaction.name d["analysis"] = '<a href="%s">link</a>' % link reaction_html_writer = HtmlWriter(os.path.join('../res', link)) self.AnalyzeSingleReaction(reaction, html_writer=reaction_html_writer) rowdicts.append(d) result_headers = ["E(" + symbol_dr_G0 + ")", "E(" + symbol_dr_G0_prime + ")", "E(" + symbol_dr_G0 + ")'", "std(" + symbol_dr_G0 + ")", "std(" + symbol_dr_G0_prime + ")"] rowdicts.sort(key=lambda x:x["diff"], reverse=True) self.html_writer.write_table(rowdicts, ["reaction", "Reference ID"] + result_headers + ["EC", "#observations", "analysis"], decimal=1) csv_writer = csv.DictWriter(open('../res/nist_regression_unique.csv', 'w'), ["_reaction", "Reference ID", "EC", "#observations"] + result_headers + ['Arren Flag'], extrasaction='ignore') csv_writer.writeheader() csv_writer.writerows(rowdicts)
#print m.ToFormat('mol') #print m.ToFormat('mol2') #print m.ToFormat('smi') #print m.ToFormat('inchi') #print m.ToFormat('sdf') diss_table = Molecule._GetDissociationTable('C(=O)(O)CN', fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) print "glycine\n", diss_table html_writer = HtmlWriter('../res/molecule.html') from pygibbs.kegg import Kegg kegg = Kegg.getInstance() html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n') for cid in [41]: m = kegg.cid2mol(cid) html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m))) diss_table = m.GetDissociationTable() pmap = diss_table.GetPseudoisomerMap() diss_table.WriteToHTML(html_writer) pmap.WriteToHTML(html_writer) html_writer.write("</p>\n") #print m.GetDissociationConstants() #print m.GetMacrospecies() #obmol = m.ToOBMol()
def main(): kegg = Kegg.getInstance() prefix = "../res/prc_" fixed_cids = {} # a dictionary from CID to pairs of (nH, dG0) # Alberty formation energies directly measured, linearly independent: fixed_cids[1] = (2, -237.19) # H2O fixed_cids[9] = (1, -1096.1) # HPO3(-2) fixed_cids[14] = (4, -79.31) # NH4(+1) fixed_cids[59] = (0, -744.53) # SO4(-2) fixed_cids[288] = (1, -586.77) # HCO3(-1) # Alberty zeros: fixed_cids[3] = (26, 0.0) # NAD(ox) fixed_cids[10] = (32, 0.0) # CoA fixed_cids[127] = (30, 0.0) # glutathione(ox) fixed_cids[376] = (28, 0.0) # retinal(ox) # Directly measured values fixed_cids[4] = (27, 22.65) # NAD(red) -- relative to NAD(ox) fixed_cids[212] = (13, -194.5) # adenosine # fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors' # Alberty zeros which are not in NIST: # fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox) # fixed_cids[16] = (31, 0.0) # FAD(ox) # fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox) # fixed_cids[61] = (19, 0.0) # FMN(ox) # fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox) # fixed_cids[399] = (90, 0.0) # ubiquinone(ox) public_db = SqliteDatabase("../data/public_data.sqlite") alberty = PsuedoisomerTableThermodynamics.FromDatabase( public_db, "alberty_pseudoisomers", label=None, name="Alberty" ) alberty_cid2dG0 = {} alberty_cid2nH = {} for cid in alberty.get_all_cids(): pmap = alberty.cid2PseudoisomerMap(cid) dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer( pH=default_pH, I=default_I, pMg=default_pMg, T=default_T ) alberty_cid2nH[cid] = nH alberty_cid2dG0[cid] = dG0 if not os.path.exists(prefix + "S.txt"): db = SqliteDatabase("../res/gibbs.sqlite") nist_regression = NistRegression(db) cid2nH = {} for cid in nist_regression.nist.GetAllCids(): if cid in fixed_cids: cid2nH[cid] = fixed_cids[cid][0] elif cid in alberty_cid2nH: cid2nH[cid] = alberty_cid2nH[cid] else: tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer( cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T ) if tmp is not None: cid2nH[cid] = tmp[0] else: logging.warning( "The most abundant pseudoisomer of %s (C%05d) " "cannot be resolved. Using nH = 0." % (kegg.cid2name(cid), cid) ) cid2nH[cid] = 0 # nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction # nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40) S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH) # export the raw data matrices to text files C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids]) np.savetxt(prefix + "CID.txt", C, fmt="%d", delimiter=",") np.savetxt(prefix + "S.txt", S, fmt="%g", delimiter=",") np.savetxt(prefix + "dG0.txt", dG0, fmt="%.2f", delimiter=",") else: C = np.loadtxt(prefix + "CID.txt", delimiter=",") cids = [int(cid) for cid in C[:, 0]] cid2nH = {} for i, cid in enumerate(cids): cid2nH[cid] = int(C[i, 1]) S = np.loadtxt(prefix + "S.txt", delimiter=",") dG0 = np.loadtxt(prefix + "dG0.txt", delimiter=",") dG0 = np.reshape(dG0, (dG0.shape[0], 1)) html_writer = HtmlWriter("../res/regression_fast.html") html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n") html_writer.write("<p>The stoichiometric matrix (S):") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, S, cids) html_writer.div_end() html_writer.write("</p>") index2value = {} S_extended = S # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds for cid in fixed_cids.keys(): i = cids.index(cid) e_i = np.zeros((1, len(cids))) e_i[0, i] = 1.0 S_extended = np.vstack([S_extended, e_i]) nH, dG0_fixed = fixed_cids[cid] index2value[i] = dG0_fixed x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value) cid2dG0 = {} for i, cid in enumerate(cids): cid2dG0[cid] = x[i] # Calculate the Kernel of the reduced stoichiometric matrix (after removing # the columns of the fixed compounds). cids_red = [cid for cid in cids if cid not in fixed_cids] index_red = [i for i in xrange(len(cids)) if i not in index2value] S_red = S[:, index_red] K_red = LinearRegression.Kernel(S_red) # print "Reduced Stoichiometric Matrix:" # print matrix2string(S_red, cids_red, kegg) # print '-'*80 # Find all CIDs that are completely determined and do not depend on any # free variable. In other words, all zeros columns in K2. dict_list = [] determined_indices = np.where(np.sum(abs(K_red), 0) < 1e-10)[0] # all zero-columns in reducedK determined_cids = [cids_red[i] for i in determined_indices] plot_data = [] for i, cid in enumerate(cids): d = { "CID": "C%05d" % cid, "Compound": kegg.cid2name(cid), "nH": "%d" % cid2nH[cid], "dG0 (PRC)": "%.1f" % cid2dG0[cid], } if cid in alberty_cid2dG0: d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid] if cid not in fixed_cids: plot_data.append((alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid))) else: d["dG0 (Alberty)"] = "" if cid in fixed_cids: d["Depends on"] = "anchored" elif cid in determined_cids: d["Depends on"] = "fixed compounds" else: d["Depends on"] = "kernel dimensions" dict_list.append(d) dict_list.sort(key=lambda (x): (x["Depends on"], x["CID"])) html_writer.write("<p>Formation energies determined by the linear constraints:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table( dict_list, headers=["#", "Compound", "CID", "nH", "dG0 (PRC)", "dG0 (Alberty)", "Depends on"] ) html_writer.write("</font>") html_writer.div_end() html_writer.write("</p>") # Plot a comparison between PRC and Alberty formation energies fig = plt.figure(figsize=(8, 8), dpi=80) plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], "b.", figure=fig) for x, y, name in plot_data: plt.text(x, y, name, fontsize=6) plt.xlabel("Alberty $\Delta_f G^\circ$") plt.ylabel("PRC $\Delta_f G^\circ$") html_writer.write("<p>Plot comparing PRC and Alberty results:") html_writer.insert_toggle(start_here=True) html_writer.embed_matplotlib_figure(fig) html_writer.div_end() html_writer.write("</p>") K_sparse = SparseKernel(S_red).Solve() html_writer.write("<p>The sparse null-space of the reduced stoichiometric matrix:") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, K_sparse, cids_red) html_writer.div_end() html_writer.write("</p>") dict_list = [] index2string_html = dict((i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0])) index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0])) for i, cid in enumerate(cids_red): d = {} d["KEGG ID"] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid) d["KEGG ID plain"] = "C%05d" % cid d["Compound"] = kegg.cid2name(cid) d["nH"] = "%d" % cid2nH[cid] if cid in alberty_cid2dG0: d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid] else: d["dG0 (Alberty)"] = "" d["dG0 (PRC)"] = "%.1f" % cid2dG0[cid] d["dG0 (PRC) plain"] = "%.1f" % cid2dG0[cid] indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist() indic.reverse() d["order_key"] = indic if mlab.rms_flat(K_sparse[:, i]) > 1e-10: d["dG0 (PRC)"] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")" d["dG0 (PRC) plain"] += " + (" + vector2string(K_sparse[:, i], index2string) + ")" dict_list.append(d) dict_list.sort(key=lambda (d): (d["order_key"], d["KEGG ID plain"])) # Export the results to CSV csv_writer = csv.writer(open("../res/prc_results.csv", "w")) csv_writer.writerow(["KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"]) for d in dict_list: csv_writer.writerow([d["KEGG ID plain"], d["Compound"], d["nH"], d["dG0 (PRC) plain"], d["dG0 (Alberty)"]]) html_writer.write("<p>All formation energies as a function of the free variables:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table(dict_list, headers=["#", "KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"]) html_writer.write("</font>") html_writer.div_end() html_writer.write("</p>") fp = open("../res/prc_latex.txt", "w") fp.write( latex.table2LaTeX( dict_list, headers=["#", "KEGG ID plain", "Compound", "nH", "dG0 (PRC) plain", "dG0 (Alberty)"] ) ) fp.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
def main(): html_writer = HtmlWriter("../res/nist/report.html") estimators = LoadAllEstimators() nist = Nist() nist.T_range = (273.15 + 24, 273.15 + 40) #nist.override_I = 0.25 #nist.override_pMg = 14.0 #nist.override_T = 298.15 html_writer.write('<p>\n') html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data)) html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \ (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist()))) html_writer.write('</p>\n') reactions = {} reactions['KEGG'] = [] for reaction in Kegg.getInstance().AllReactions(): try: reaction.Balance(balance_water=True, exception_if_unknown=True) reactions['KEGG'].append(reaction) except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError): pass reactions['FEIST'] = Feist.FromFiles().reactions reactions['NIST'] = nist.GetUniqueReactionSet() pairs = [] #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')] for t1, t2 in pairs: logging.info('Writing the NIST report for %s vs. %s' % (estimators[t1].name, estimators[t2].name)) html_writer.write('<p><b>%s vs. %s</b> ' % (estimators[t1].name, estimators[t2].name)) html_writer.insert_toggle(start_here=True) two_way_comparison(html_writer=html_writer, thermo1=estimators[t1], thermo2=estimators[t2], reaction_list=reactions['FEIST'], name='%s_vs_%s' % (t1, t2)) html_writer.div_end() html_writer.write('</p>') if False: estimators['alberty'].CompareOverKegg( html_writer, other=estimators['PRC'], fig_name='kegg_compare_alberty_vs_nist') rowdicts = [] rowdict = {'Method': 'Total'} for db_name, reaction_list in reactions.iteritems(): rowdict[db_name + ' coverage'] = len(reaction_list) rowdicts.append(rowdict) for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']: thermo = estimators[name] logging.info('Writing the NIST report for %s' % thermo.name) html_writer.write('<p><b>%s</b> ' % thermo.name) html_writer.insert_toggle(start_here=True) num_estimations, rmse = nist.verify_results(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.div_end() html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse)) logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse)) rowdict = { 'Method': thermo.name, 'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations) } for db_name, reaction_list in reactions.iteritems(): n_covered = thermo.CalculateCoverage(reaction_list) percent = n_covered * 100.0 / len(reaction_list) rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered) logging.info(db_name + " coverage = %.1f%%" % percent) rowdicts.append(rowdict) headers = ['Method', 'RMSE (kJ/mol)'] + \ [db_name + ' coverage' for db_name in reactions.keys()] html_writer.write_table(rowdicts, headers=headers)
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix( obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def analyze(prefix, thermo): kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") #pH_vec = np.arange(5, 9.001, 0.5) #pH_vec = np.array([6, 7, 8]) pH_vec = np.array([6, 7, 8]) # this needs to be fixed so that the txt file will set the pH #co2_conc_vec = np.array([1e-5, 1e-3]) co2_conc_vec = np.array([1e-5]) data_mat = [] override_bounds = {} for pH in pH_vec.flat: co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH)) for co2_conc in co2_conc_vec.flat: carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T)) #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I) override_bounds[11] = (co2_conc, co2_conc) override_bounds[288] = (carbonate_conc, carbonate_conc) section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc*1000) section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc*1000) html_writer.write('<h1 id="%s_title">%s</h1>\n' % (section_prefix, section_title)) html_writer.write_ul(['<a href="#%s_tables">Individual result tables</a>' % section_prefix, '<a href="#%s_summary">Summary table</a>' % section_prefix, '<a href="#%s_figure">Summary figure</a>' % section_prefix]) data, labels = pareto(kegg_file, html_writer, thermo, pH=pH, section_prefix=section_prefix, balance_water=True, override_bounds=override_bounds) data_mat.append(data) data_mat = np.array(data_mat) if data_mat.shape[0] == 1: pareto_fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig) for i in xrange(data_mat.shape[1]): if data[i, 1] < 0: color = 'grey' else: color = 'black' plt.text(data_mat[0, i, 0], data_mat[0, i, 1], labels[i], ha='left', va='bottom', fontsize=8, color=color, figure=pareto_fig) plt.title(section_title, figure=pareto_fig) else: pareto_fig = plt.figure(figsize=(10, 10), dpi=90) for i in xrange(data_mat.shape[1]): plt.plot(data_mat[:, i, 0], data_mat[:, i, 1], '-', figure=pareto_fig) plt.text(data_mat[0, i, 0], data_mat[0, i, 1], '%g' % pH_vec[0], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.text(data_mat[-1, i, 0], data_mat[-1, i, 1], '%g' % pH_vec[-1], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.legend(labels, loc='upper right') plt.title('Pareto', figure=pareto_fig) plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig) html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' % section_prefix) # plot the Pareto figure showing all values (including infeasible) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0') # set axes to hide infeasible pathways and focus on feasible ones pareto_fig.axes[0].set_xlim(None, 0) pareto_fig.axes[0].set_ylim(0, None) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1') html_writer.close()