Python GroupContribution.kegg示例

def test_all_modules():
    from pygibbs.groups import GroupContribution
    gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="dG0_test")
    gc.init()
    c_range = (1e-6, 1e-2)
    c_mid = 1e-3
    pH = 8
    I = 0.1
    T = 300
    map_cid = {201:2, 454:8} # CIDs that should be mapped to other CIDs because they are unspecific (like NTP => ATP)
    
    cids_with_missing_dG_f = set()
    
    f = open("../res/feasibility.csv", "w")
    csv_output = csv.writer(f)
    csv_output.writerow(("MID", "module name", "pH", "I", "T", "pCr", "MTDF"))
    for mid in sorted(gc.kegg().mid2rid_map.keys()):
        module_name = gc.kegg().mid2name_map[mid]
        try:
            S, _rids, _fluxes, cids = gc.kegg().get_module(mid)
        except KeggMissingModuleException as e:
            sys.stderr.write("WARNING: " + str(e) + "\n")
            continue
        _Nr, Nc = S.shape
        for pH in [5, 6, 7, 8, 9]:
            for I in [0.0, 0.1, 0.2, 0.3, 0.4]:
                dG0_f = pylab.zeros((Nc, 1))
                bounds = []
                for c in range(Nc):
                    cid = map_cid.get(cids[c], cids[c])
                    try:
                        pmap = gc.cid2PseudoisomerMap(cid)
                        dG0_f[c] = gc.pmap_to_dG0(pmap, pH, I, T)
                    except MissingCompoundFormationEnergy as e:
                        if (cid not in cids_with_missing_dG_f):
                            sys.stderr.write("Setting the dG0_f of C%05d to NaN because: %s\n"\
                                             % (cid, str(e)))
                            cids_with_missing_dG_f.add(cid)
                        dG0_f[c] = pylab.nan
            
                bounds = [gc.kegg().cid2bounds.get(cid, (None, None)) for cid in cids]

                try:
                    _dG_f, _concentrations, pCr = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds)
                except LinProgNoSolutionException:
                    sys.stderr.write("M%05d: Pathway is theoretically infeasible\n" % mid)
                    pCr = None

                try:
                    _dG_f, _concentrations, MTDF = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds)
                except LinProgNoSolutionException:
                    sys.stderr.write("M%05d: Pathway is theoretically infeasible\n" % mid)
                    MTDF = None
                
                csv_output.writerow([mid, module_name, pH, I, T, pCr, MTDF])
                        
    f.close()

示例#2

显示文件

def test_all_modules():
    from pygibbs.groups import GroupContribution
    gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="dG0_test")
    gc.init()
    c_range = (1e-6, 1e-2)
    c_mid = 1e-3
    pH = 8
    I = 0.1
    T = 300
    map_cid = {
        201: 2,
        454: 8
    }  # CIDs that should be mapped to other CIDs because they are unspecific (like NTP => ATP)

    cids_with_missing_dG_f = set()

    f = open("../res/feasibility.csv", "w")
    csv_output = csv.writer(f)
    csv_output.writerow(("MID", "module name", "pH", "I", "T", "pCr", "MTDF"))
    for mid in sorted(gc.kegg().mid2rid_map.keys()):
        module_name = gc.kegg().mid2name_map[mid]
        try:
            S, _rids, _fluxes, cids = gc.kegg().get_module(mid)
        except KeggMissingModuleException as e:
            sys.stderr.write("WARNING: " + str(e) + "\n")
            continue
        _Nr, Nc = S.shape
        for pH in [5, 6, 7, 8, 9]:
            for I in [0.0, 0.1, 0.2, 0.3, 0.4]:
                dG0_f = pylab.zeros((Nc, 1))
                bounds = []
                for c in range(Nc):
                    cid = map_cid.get(cids[c], cids[c])
                    try:
                        pmap = gc.cid2PseudoisomerMap(cid)
                        dG0_f[c] = gc.pmap_to_dG0(pmap, pH, I, T)
                    except MissingCompoundFormationEnergy as e:
                        if (cid not in cids_with_missing_dG_f):
                            sys.stderr.write("Setting the dG0_f of C%05d to NaN because: %s\n"\
                                             % (cid, str(e)))
                            cids_with_missing_dG_f.add(cid)
                        dG0_f[c] = pylab.nan

                bounds = [
                    gc.kegg().cid2bounds.get(cid, (None, None)) for cid in cids
                ]

                try:
                    _dG_f, _concentrations, pCr = find_pCr(S,
                                                           dG0_f,
                                                           c_mid=c_mid,
                                                           ratio=3.0,
                                                           bounds=bounds)
                except LinProgNoSolutionException:
                    sys.stderr.write(
                        "M%05d: Pathway is theoretically infeasible\n" % mid)
                    pCr = None

                try:
                    _dG_f, _concentrations, MTDF = find_mtdf(S,
                                                             dG0_f,
                                                             c_range=c_range,
                                                             bounds=bounds)
                except LinProgNoSolutionException:
                    sys.stderr.write(
                        "M%05d: Pathway is theoretically infeasible\n" % mid)
                    MTDF = None

                csv_output.writerow([mid, module_name, pH, I, T, pCr, MTDF])

    f.close()

示例#3

显示文件

文件： nist_gradient_ascent.py 项目： titus0810/milo-lab

def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()

    if True:
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=False,
                            T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)

        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0).
        grad.anchors = grad.load_dG0_data(
            "../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=True,
                            T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")

    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")

    elif False:  # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter

    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))

        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())

        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" %
                              (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))

            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()

示例#4

显示文件

文件： nist_gradient_ascent.py 项目： issfangks/milo-lab

def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()
    
    if True:
        grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)
        
        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). 
        grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")
    
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")
    
    elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter
        
    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))
                
        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())
        
        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))
            
            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()