def LoadAllEstimators(): db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') if not db_gibbs.DoesTableExist('prc_pseudoisomers'): nist_regression = NistRegression(db_gibbs) nist_regression.Train() tables = { 'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'), 'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)') } estimators = {} for key, (db, table_name, thermo_name) in tables.iteritems(): if db.DoesTableExist(table_name): estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase( db, table_name, name=thermo_name) else: logging.warning('The table %s does not exist in %s' % (table_name, str(db))) estimators['hatzi_gc'] = Hatzi(use_pKa=False) #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True) if db.DoesTableExist('bgc_pseudoisomers'): estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True) estimators['BGC'].init() estimators['BGC'].name = 'our method (BGC)' if db.DoesTableExist('pgc_pseudoisomers'): estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False) estimators['PGC'].init() estimators['PGC'].name = 'our method (PGC)' estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs) estimators['UGC'].init() estimators['UGC'].name = 'our method (UGC)' estimators['C1'] = ReactionThermodynamics.FromCsv( '../data/thermodynamics/c1_reaction_thermodynamics.csv', estimators['alberty']) if 'PGC' in estimators: estimators['merged'] = BinaryThermodynamics(estimators['alberty'], estimators['PGC']) estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'], estimators['PGC']) for thermo in estimators.values(): thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv') return estimators
def LoadAllEstimators(): db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') if not db_gibbs.DoesTableExist('prc_pseudoisomers'): nist_regression = NistRegression(db_gibbs) nist_regression.Train() tables = {'alberty': (db_public, 'alberty_pseudoisomers', 'Alberty'), 'PRC': (db_gibbs, 'prc_pseudoisomers', 'our method (PRC)')} estimators = {} for key, (db, table_name, thermo_name) in tables.iteritems(): if db.DoesTableExist(table_name): estimators[key] = PsuedoisomerTableThermodynamics.FromDatabase( db, table_name, name=thermo_name) else: logging.warning('The table %s does not exist in %s' % (table_name, str(db))) estimators['hatzi_gc'] = Hatzi(use_pKa=False) #estimators['hatzi_gc_pka'] = Hatzi(use_pKa=True) if db.DoesTableExist('bgc_pseudoisomers'): estimators['BGC'] = GroupContribution(db=db_gibbs, transformed=True) estimators['BGC'].init() estimators['BGC'].name = 'our method (BGC)' if db.DoesTableExist('pgc_pseudoisomers'): estimators['PGC'] = GroupContribution(db=db_gibbs, transformed=False) estimators['PGC'].init() estimators['PGC'].name = 'our method (PGC)' estimators['UGC'] = UnifiedGroupContribution(db=db_gibbs) estimators['UGC'].init() estimators['UGC'].name = 'our method (UGC)' estimators['C1'] = ReactionThermodynamics.FromCsv( '../data/thermodynamics/c1_reaction_thermodynamics.csv', estimators['alberty']) if 'PGC' in estimators: estimators['merged'] = BinaryThermodynamics(estimators['alberty'], estimators['PGC']) estimators['merged_C1'] = BinaryThermodynamics(estimators['C1'], estimators['PGC']) for thermo in estimators.values(): thermo.load_bounds('../data/thermodynamics/concentration_bounds.csv') return estimators
def Initialize(self, db): from pygibbs.unified_group_contribution import UnifiedGroupContribution ugc = UnifiedGroupContribution(db) ugc.LoadGroups(FromDatabase=True) ugc.LoadObservations(FromDatabase=True) ugc.LoadGroupVectors(FromDatabase=True) ugc.LoadData(FromDatabase=True) ugc.init() self.groups_data = ugc.groups_data self.group_decomposer = ugc.group_decomposer result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(), ugc.anchored) self.g_pgc = result_dict['group_contributions'] self.P_L_pgc = result_dict['pgc_conservations']
plt.xlabel('value in iAF1260 [kJ/mol]') plt.ylabel('UGCM estimation [kJ/mol]') plt.title('Unobserved data, N = %d, RMSE = %.1f [kJ/mol]' % (len(non_nist_idx), rms_feist_ugcm2)) plt.tight_layout() plt.savefig(FIG_FNAME + "_fig2.svg", fmt='.svg') plt.figure(figsize=(6, 6), dpi=90) bins = np.arange(-30, 30, 2) plt.hist([err_feist_nist, err_ugcm_nist], bins=bins, histtype='bar', cumulative=False, normed=False) plt.xlabel('Error in kJ/mol') plt.ylabel('# of reactions') plt.legend(['value in iAF1260', 'UGCM estimation']) plt.savefig(FIG_FNAME + "_fig3.svg", fmt='.svg') db = SqliteDatabase('../res/gibbs.sqlite', 'w') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(True) ugc.LoadObservations(True) ugc.LoadGroupVectors(True) ugc.LoadData(True) ugc.init() r_list = [] #r_list += [Reaction.FromFormula("C00036 + C00044 = C00011 + C00035 + C00074")] #r_list += [Reaction.FromFormula("C00003 + C00037 + C00101 = C00004 + C00011 + C00014 + C00080 + C00143")] # glycine synthase r_list += [Reaction.FromFormula("C00001 + C00002 + C00064 + C04376 => C00008 + C00009 + C00025 + C04640")] #r_list += [Reaction.FromFormula("C00001 + 2 C00002 + C00064 + C00288 <=> 2 C00008 + C00009 + C00025 + C00169")] kegg = Kegg.getInstance() S, cids = kegg.reaction_list_to_S(r_list)
def CalculateThermo(): options, _ = MakeOpts().parse_args(sys.argv) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout csv_writer = csv.writer(out_fp) csv_writer.writerow(['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel']) db = SqliteDatabase('../res/gibbs.sqlite', 'w') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(True) ugc.LoadObservations(True) ugc.LoadGroupVectors(True) ugc.LoadData(True) result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(), ugc.anchored) g_pgc = result_dict['group_contributions'] P_L_pgc = result_dict['pgc_conservations'] sdfile = pybel.readfile("sdf", options.sdf_input_filename) for m in sdfile: try: try: mol = Molecule.FromOBMol(m.OBMol) except OpenBabelError: raise UnknownReactionEnergyError("Cannot convert to OBMol object") mol.title = m.title mol.RemoveHydrogens() if mol.GetNumAtoms() > 200: raise UnknownReactionEnergyError("Compound contains more than 200 atoms (n = %d)" % mol.GetNumAtoms()) try: decomposition = ugc.group_decomposer.Decompose(mol, ignore_protonations=False, strict=True) except GroupDecompositionError: raise UnknownReactionEnergyError("cannot decompose") groupvec = decomposition.AsVector() gv = np.matrix(groupvec.Flatten()) dG0 = float(g_pgc * gv.T) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() ker = list((P_L_pgc * gv.T).round(10).flat) try: diss_table = mol.GetDissociationTable() diss_table.SetFormationEnergyByNumHydrogens( dG0=dG0, nH=nH, nMg=nMg) except MissingDissociationConstantError: raise UnknownReactionEnergyError("missing pKa data") pmap = diss_table.GetPseudoisomerMap() for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): csv_writer.writerow([m.title, None, p_nH, p_z, p_nMg, round(p_dG0, 1), str(ker)]) except UnknownReactionEnergyError as e: csv_writer.writerow([m.title, str(e), None, None, None, None, None]) out_fp.flush()
plt.savefig(FIG_FNAME + "_fig2.svg", fmt='.svg') plt.figure(figsize=(6, 6), dpi=90) bins = np.arange(-30, 30, 2) plt.hist([err_feist_nist, err_ugcm_nist], bins=bins, histtype='bar', cumulative=False, normed=False) plt.xlabel('Error in kJ/mol') plt.ylabel('# of reactions') plt.legend(['value in iAF1260', 'UGCM estimation']) plt.savefig(FIG_FNAME + "_fig3.svg", fmt='.svg') db = SqliteDatabase('../res/gibbs.sqlite', 'w') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(True) ugc.LoadObservations(True) ugc.LoadGroupVectors(True) ugc.LoadData(True) ugc.init() r_list = [] #r_list += [Reaction.FromFormula("C00036 + C00044 = C00011 + C00035 + C00074")] #r_list += [Reaction.FromFormula("C00003 + C00037 + C00101 = C00004 + C00011 + C00014 + C00080 + C00143")] # glycine synthase r_list += [ Reaction.FromFormula( "C00001 + C00002 + C00064 + C04376 => C00008 + C00009 + C00025 + C04640" ) ] #r_list += [Reaction.FromFormula("C00001 + 2 C00002 + C00064 + C00288 <=> 2 C00008 + C00009 + C00025 + C00169")]
def CalculateThermo(): options, _ = MakeOpts().parse_args(sys.argv) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout csv_writer = csv.writer(out_fp) csv_writer.writerow( ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel']) db = SqliteDatabase('../res/gibbs.sqlite', 'w') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(True) ugc.LoadObservations(True) ugc.LoadGroupVectors(True) ugc.LoadData(True) result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(), ugc.anchored) g_pgc = result_dict['group_contributions'] P_L_pgc = result_dict['pgc_conservations'] sdfile = pybel.readfile("sdf", options.sdf_input_filename) for m in sdfile: try: try: mol = Molecule.FromOBMol(m.OBMol) except OpenBabelError: raise UnknownReactionEnergyError( "Cannot convert to OBMol object") mol.title = m.title mol.RemoveHydrogens() if mol.GetNumAtoms() > 200: raise UnknownReactionEnergyError( "Compound contains more than 200 atoms (n = %d)" % mol.GetNumAtoms()) try: decomposition = ugc.group_decomposer.Decompose( mol, ignore_protonations=False, strict=True) except GroupDecompositionError: raise UnknownReactionEnergyError("cannot decompose") groupvec = decomposition.AsVector() gv = np.matrix(groupvec.Flatten()) dG0 = float(g_pgc * gv.T) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() ker = list((P_L_pgc * gv.T).round(10).flat) try: diss_table = mol.GetDissociationTable() diss_table.SetFormationEnergyByNumHydrogens(dG0=dG0, nH=nH, nMg=nMg) except MissingDissociationConstantError: raise UnknownReactionEnergyError("missing pKa data") pmap = diss_table.GetPseudoisomerMap() for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): csv_writer.writerow([ m.title, None, p_nH, p_z, p_nMg, round(p_dG0, 1), str(ker) ]) except UnknownReactionEnergyError as e: csv_writer.writerow( [m.title, str(e), None, None, None, None, None]) out_fp.flush()
# This file reads the data stored in the gibbs.sqlite database and import os, sys orig_dir = os.getcwd() pygibbs_path, _ = os.path.split(orig_dir) src_path, _ = os.path.split(pygibbs_path) os.chdir(src_path) print src_path sys.path.append(src_path) import numpy as np from pygibbs.unified_group_contribution import UnifiedGroupContribution from toolbox.database import SqliteDatabase db = SqliteDatabase('../res/gibbs.sqlite', 'r') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(FromDatabase=True) ugc.LoadObservations(FromDatabase=True) ugc.LoadGroupVectors(FromDatabase=True) ugc.LoadData(FromDatabase=True) ugc.init() result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(), ugc.anchored) g_pgc = result_dict['group_contributions'] P_L_pgc = result_dict['pgc_conservations'] os.chdir(orig_dir) print os.getcwd() np.save('g_pgc.gz', g_pgc) np.save('P_L_g_pgc.gz', P_L_pgc)