def test_all_modules(): from pygibbs.groups import GroupContribution gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="dG0_test") gc.init() c_range = (1e-6, 1e-2) c_mid = 1e-3 pH = 8 I = 0.1 T = 300 map_cid = {201:2, 454:8} # CIDs that should be mapped to other CIDs because they are unspecific (like NTP => ATP) cids_with_missing_dG_f = set() f = open("../res/feasibility.csv", "w") csv_output = csv.writer(f) csv_output.writerow(("MID", "module name", "pH", "I", "T", "pCr", "MTDF")) for mid in sorted(gc.kegg().mid2rid_map.keys()): module_name = gc.kegg().mid2name_map[mid] try: S, _rids, _fluxes, cids = gc.kegg().get_module(mid) except KeggMissingModuleException as e: sys.stderr.write("WARNING: " + str(e) + "\n") continue _Nr, Nc = S.shape for pH in [5, 6, 7, 8, 9]: for I in [0.0, 0.1, 0.2, 0.3, 0.4]: dG0_f = pylab.zeros((Nc, 1)) bounds = [] for c in range(Nc): cid = map_cid.get(cids[c], cids[c]) try: pmap = gc.cid2PseudoisomerMap(cid) dG0_f[c] = gc.pmap_to_dG0(pmap, pH, I, T) except MissingCompoundFormationEnergy as e: if (cid not in cids_with_missing_dG_f): sys.stderr.write("Setting the dG0_f of C%05d to NaN because: %s\n"\ % (cid, str(e))) cids_with_missing_dG_f.add(cid) dG0_f[c] = pylab.nan bounds = [gc.kegg().cid2bounds.get(cid, (None, None)) for cid in cids] try: _dG_f, _concentrations, pCr = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds) except LinProgNoSolutionException: sys.stderr.write("M%05d: Pathway is theoretically infeasible\n" % mid) pCr = None try: _dG_f, _concentrations, MTDF = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds) except LinProgNoSolutionException: sys.stderr.write("M%05d: Pathway is theoretically infeasible\n" % mid) MTDF = None csv_output.writerow([mid, module_name, pH, I, T, pCr, MTDF]) f.close()
def test_all_modules(): from pygibbs.groups import GroupContribution gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="dG0_test") gc.init() c_range = (1e-6, 1e-2) c_mid = 1e-3 pH = 8 I = 0.1 T = 300 map_cid = { 201: 2, 454: 8 } # CIDs that should be mapped to other CIDs because they are unspecific (like NTP => ATP) cids_with_missing_dG_f = set() f = open("../res/feasibility.csv", "w") csv_output = csv.writer(f) csv_output.writerow(("MID", "module name", "pH", "I", "T", "pCr", "MTDF")) for mid in sorted(gc.kegg().mid2rid_map.keys()): module_name = gc.kegg().mid2name_map[mid] try: S, _rids, _fluxes, cids = gc.kegg().get_module(mid) except KeggMissingModuleException as e: sys.stderr.write("WARNING: " + str(e) + "\n") continue _Nr, Nc = S.shape for pH in [5, 6, 7, 8, 9]: for I in [0.0, 0.1, 0.2, 0.3, 0.4]: dG0_f = pylab.zeros((Nc, 1)) bounds = [] for c in range(Nc): cid = map_cid.get(cids[c], cids[c]) try: pmap = gc.cid2PseudoisomerMap(cid) dG0_f[c] = gc.pmap_to_dG0(pmap, pH, I, T) except MissingCompoundFormationEnergy as e: if (cid not in cids_with_missing_dG_f): sys.stderr.write("Setting the dG0_f of C%05d to NaN because: %s\n"\ % (cid, str(e))) cids_with_missing_dG_f.add(cid) dG0_f[c] = pylab.nan bounds = [ gc.kegg().cid2bounds.get(cid, (None, None)) for cid in cids ] try: _dG_f, _concentrations, pCr = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds) except LinProgNoSolutionException: sys.stderr.write( "M%05d: Pathway is theoretically infeasible\n" % mid) pCr = None try: _dG_f, _concentrations, MTDF = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds) except LinProgNoSolutionException: sys.stderr.write( "M%05d: Pathway is theoretically infeasible\n" % mid) MTDF = None csv_output.writerow([mid, module_name, pH, I, T, pCr, MTDF]) f.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data( "../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()