def get_atom_vector(self): """ Returns a NumPy row array describing the number of atoms from each element (the column index is the atomic number of that element). The first column (index=0) contains the number of electrons. """ atom_bag = self.get_atom_bag() if not atom_bag: return None atom_vector = np.zeros((Molecule.GetNumberOfElements() + 1), dtype='int') for elem, count in atom_bag.iteritems(): if elem in ['R', 'X']: return None # wildcard compound! an = Molecule.GetAtomicNum(elem) if not an: logging.warning("Unsupported element in (C%05d): %s", (self.cid, elem)) return None atom_vector[an] = count atom_vector[0] = self.get_num_electrons() return atom_vector
def ToTableString(self): """Returns the decomposition as a tabular string.""" spacer = '-' * 50 + '\n' l = [ '%30s | %2s | %2s | %3s | %s\n' % ("group name", "nH", "z", "nMg", "nodes"), spacer ] for group, node_sets in self.groups: if group.hydrogens is None and group.charge is None and group.nMg is None: for n_set in node_sets: s = '%30s | | | | %s\n' % \ (group.name, ','.join([str(i) for i in n_set])) l.append(s) else: for n_set in node_sets: s = '%30s | %2d | %2d | %2d | %s\n' % \ (group.name, group.hydrogens or 0, group.charge or 0, group.nMg or 0, ','.join([str(i) for i in n_set])) l.append(s) if self.unassigned_nodes: l.append('\nUnassigned nodes: \n') l.append('%10s | %3s | %2s | %10s | %10s\n' % ('index', 'an', 'el', 'valence', 'charge')) l.append(spacer) all_atoms = self.mol.GetAtoms() for i in self.unassigned_nodes: a = all_atoms[i] l.append('%10d | %3d | %2s | %10d | %10d\n' % (i, a.atomicnum, Molecule.GetSymbol( a.atomicnum), a.heavyvalence, a.formalcharge)) return ''.join(l)
def add_thermodynamics(cursor): from groups import GroupMissingTrainDataError, GroupDecompositionError gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="pathologic") gc.init() cursor.execute("DROP TABLE IF EXISTS yeast_inchi2thermo") cursor.execute( "CREATE TABLE yeast_inchi2thermo (inchi TEXT, charge INT, nH INT, dG0_f REAL)" ) cursor.execute("DROP INDEX IF EXISTS yeast_inchi2thermo_idx") cursor.execute( "CREATE INDEX yeast_inchi2thermo_idx ON yeast_inchi2thermo (inchi);") inchi_list = [] for row in cursor.execute("SELECT distinct(inchi) " \ "FROM yeast_species2inchi WHERE inchi IS NOT NULL"): inchi = row[0] inchi_list.append(str(inchi)) for inchi in inchi_list: try: mol = Molecule.FromInChI(str(inchi)) pmap = gc.Mol2PseudoisomerMap(mol) for ((z, nH), dG0) in pmap.iteritems(): cursor.execute( "INSERT INTO yeast_inchi2thermo VALUES(?,?,?,?)", [inchi, z, nH, dG0]) except (IOError, GroupMissingTrainDataError, GroupDecompositionError): sys.stderr.write( "Cannot convert the following InChI to a pybel Molecule")
def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def FromGroupsFile(fp, transformed=False): """Factory that initializes a GroupData from a CSV file.""" list_of_groups = [] logging.info('Reading the list of groups from %s' % fp.name) gid = 0 for row in csv.DictReader(fp): if row.get('SKIP', False): logging.warning('Skipping group %s', row.get('NAME')) continue group_name = row['NAME'] protons = int(row['PROTONS']) charge = int(row['CHARGE']) mgs = int(row['MAGNESIUMS']) smarts = row['SMARTS'] focal_atoms = FocalSet(row['FOCAL_ATOMS']) _remark = row['REMARK'] # Check that the smarts are good. if not Molecule.VerifySmarts(smarts): raise GroupsDataError('Cannot parse SMARTS: %s' % smarts) group = Group(gid, group_name, protons, charge, mgs, str(smarts), focal_atoms) list_of_groups.append(group) gid += 1 logging.info('Done reading groups data.') return GroupsData(list_of_groups, transformed)
def GetMolInput(dissociation): mols = [ ] # a list of pairs of Molecule objects and stoichiometric coefficients while mols == []: print 'KEGG ID or SMILES (or Enter to quit):', s_input = raw_input() if not s_input: return [] elif re.findall('C\d\d\d\d\d', s_input) != []: try: cid = int(s_input[1:]) mols = [(GetMostAbundantMol(cid, dissociation), 1)] print "Compound:", mols[0][0].ToInChI() except ValueError: print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input elif re.findall('R\d\d\d\d\d', s_input) != []: try: rid = int(s_input[1:]) reaction = Kegg.getInstance().rid2reaction(rid) print "Reaction:", str(reaction) for cid, coeff in reaction.iteritems(): mols += [(GetMostAbundantMol(cid, dissociation), coeff)] except ValueError: print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input else: try: mols = [(Molecule.FromSmiles(s_input), 1)] print "Compound:", mols[0][0].ToInChI() except Exception: print 'unable to parse SMILES string, please try again' return mols
def CreateEmptyGroupDecomposition(self): emptymol = Molecule.FromSmiles("") decomposition = self.Decompose(emptymol, ignore_protonations=True, strict=False) for i, (group, _node_sets) in enumerate(decomposition.groups): decomposition.groups[i] = (group, []) return decomposition
def SetInChI(self, inchi): if inchi == None: self.inchi = None self.mol = None self.formula = None self.mass = None else: self.inchi = inchi self.mol = Molecule.FromInChI(inchi) self.formula = self.mol.GetFormula() self.mass = self.mol.GetExactMass()
def GetMol(self, nH=None, nMg=0): from toolbox.molecule import Molecule if nH is None: nH = self.min_nH if (nH, nMg) not in self.mol_dict: return None s, mol = self.mol_dict[nH, nMg] if mol is None: mol = Molecule.FromSmiles(s) self.mol_dict[nH, nMg] = (s, mol) return mol
def EstimateInChI(self, inchi): mol = Molecule.FromInChI(inchi) #mol.RemoveHydrogens() decomposition = self.group_decomposer.Decompose( mol, ignore_protonations=False, strict=True) nH = decomposition.Hydrogens() charge = decomposition.NetCharge() nMg = decomposition.Magnesiums() groupvec = decomposition.AsVector() dG0, ker = self.EstimateGroupVector(groupvec) return dG0, nH, charge, nMg, ker
def get_nH_and_charge(self): if not self.mol and self.inchi: self.mol = Molecule.FromInChI(self.inchi) if self.mol: return self.mol.GetHydrogensAndCharge() # if there is no InChI assume that self.formula is correct and that # it represents the number of H for the neutral species atom_bag = self.get_atom_bag() if not atom_bag: return None return atom_bag.get('H', 0), 0
def get_num_electrons(self): """Return the putative number of electrons in the molecule.""" mol = self.GetMolecule() if mol: return mol.GetNumElectrons() # if there is no InChI assume that self.formula is correct and that # the charge is 0. atom_bag = self.get_atom_bag() if not atom_bag: return None n_protons = 0 for elem, count in atom_bag.iteritems(): n_protons += count * Molecule.GetAtomicNum(elem) return n_protons
def GetMolecule(self): """Gets a Molecule for this compound if possible. Returns None if no molecular data is available. """ if self.mol: return self.mol if self.inchi: self.mol = Molecule.FromInChI(self.inchi) self.mol.SetTitle(self.name) return self.mol raise kegg_errors.KeggParseException( "C%05d (%s) doesn't have an explicit molecular structure" % (self.cid, self.name))
def test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=False): if diss is None: logging.warning('%s: does not appear in the dissociation table' % id) return nH, nMg = diss.GetMostAbundantPseudoisomer(pH=default_pH, I=default_I, pMg=14, T=default_T) if nMg != 0: logging.warning('%s: default species has nMg = %d' % (id, nMg)) return smiles = diss.GetSmiles(nH=nH, nMg=0) if not smiles: if not ignore_missing_smiles: logging.warning( '%s: no SMILES in the dissociation table for nH = %d' % (id, nH)) return logging.debug('%s: nH = %d, smiles = %s' % (id, nH, smiles)) mol = Molecule.FromSmiles(smiles) try: decomposition = group_decomposer.Decompose(mol, ignore_protonations=False, strict=True) except GroupDecompositionError: return groupvec = decomposition.AsVector() logging.debug("%s: decomposition = %s" % (id, groupvec)) gc_nH = decomposition.Hydrogens() if nH != gc_nH: logging.warning( '%s: nH doesn\'t match: explicit = %d, decomposition = %d' % (id, nH, gc_nH))
def __init__(self, uid=None, name=None, all_names=None, mass=None, formula=None, inchi=None, pubchem_id=None, cas=None, regulates=None, types=None, smiles=None): self.uid = uid; # UNIQUE-ID self.name = name # COMMON-NAME if (self.name): self.name = re.sub('<.+?>', '', name) # Removing HTML tags self.all_names = [] # SYNONYMS if (all_names and len(all_names) > 0): for s in all_names: self.all_names.append(re.sub('<.+?>', '', s)) self.mass = mass # MOLECULAR-WEIGHT self.formula = formula # CHEMICAL-FORMULA self.inchi = inchi if inchi != None else "" # INCHI self.pubchem_id = None # Parsed from DBLINKS self.cas = "" # Parsed from DBLINKS self.regulates = regulates if regulates != None else [] # REGULATES self.types = types if types != None else [] # TYPES self.smiles = smiles if smiles != None else "" # SMILES if (smiles and not inchi): self.inchi = Molecule.Smiles2InChI(smiles)
def ConvertFormation2Reaction(self, output_fname): logging.info("Converting all formation energies to reactions") output_csv = csv.writer(open(output_fname, 'w')) # keep the format used for TECRDB output_csv.writerow( ('ref', 'ID', 'method', 'eval', 'EC', 'name', 'kegg_reaction', 'reaction', 'dG0\'', 'T', 'I', 'pH', 'pMg')) atom2cid = {} for atom, (name, stoich) in KeggObservation.ATOM2ELEMENT.iteritems(): cid, _, _ = self.kegg.name2cid(name, 0) if cid is None: raise Exception( "Cannot find the element %s in the KEGG database" % name) atom2cid[atom] = (cid, stoich) #output_csv.writerow(('element', # 'C%05d' % cid, 'formation', 'A', '', # 'formation of %s' % self.kegg.cid2name(cid), # "C%05d" % cid, # name, 0, self.T, self.I, self.pH, self.pMg)) for label in ['training', 'testing']: ptable = PsuedoisomerTableThermodynamics.FromCsvFile( self.FormationEnergyFileName, label=label) for cid in ptable.get_all_cids(): pmatrix = ptable.cid2PseudoisomerMap(cid).ToMatrix() if len(pmatrix) != 1: raise Exception("multiple training species for C%05d" % cid) nH, _charge, nMg, dG0 = pmatrix[0] diss_table = dissociation.GetDissociationTable(cid, False) if diss_table is None: continue diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) dG0_prime = diss_table.Transform(pH=self.pH, I=self.I, pMg=self.pMg, T=self.T) ref = ptable.cid2SourceString(cid) atom_bag = self.kegg.cid2atom_bag(cid) if not atom_bag: continue ne = self.kegg.cid2num_electrons(cid) elem_ne = 0 sparse = {cid: 1} for elem, count in atom_bag.iteritems(): if elem == 'H': continue elem_ne += count * Molecule.GetAtomicNum(elem) elem_cid, elem_coeff = atom2cid[elem] sparse.setdefault(elem_cid, 0) sparse[elem_cid] += -count * elem_coeff # use the H element to balance the electrons in the formation # reactions (we don't need to balance protons since this is # a biochemical reaction, so H+ are 'free'). H_cid, H_coeff = atom2cid['H'] sparse[H_cid] = (elem_ne - ne) * H_coeff reaction = Reaction( "formation of %s" % self.kegg.cid2name(cid), sparse) output_csv.writerow( (ref, 'C%05d' % cid, 'formation', 'A', '', 'formation of %s' % self.kegg.cid2name(cid), reaction.FullReactionString(), reaction.FullReactionString(show_cids=False), '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg))
def CalculateThermo(): parser = MakeOpts() options, _ = parser.parse_args(sys.argv) pH, I, pMg, T = options.pH, options.I, options.pMg, options.T db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db) G.init() ignore_protonations = False list_of_mols = [] if options.smiles: list_of_mols.append({ 'id': options.smiles, 'mol': options.smiles, 'format': 'smiles' }) elif options.inchi: list_of_mols.append({ 'id': options.inchi, 'mol': options.inchi, 'format': 'inchi' }) elif options.csv_input_filename: for row in csv.DictReader(open(options.csv_input_filename, 'r')): if "InChI" in row: list_of_mols.append({ 'id': row["ID"], 'mol': row["InChI"], 'format': 'inchi' }) elif "smiles" in row: list_of_mols.append({ 'id': row["ID"], 'mol': row["smiles"], 'format': 'smiles' }) else: raise Exception( "There must be one molecular ID column: InChI or smiles") else: parser.error("must use either -s or -c option") if options.biochemical: print( "Calculating biochemical formation energies for %s compounds" " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" % (len(list_of_mols), pH, I, pMg, T)) else: print("Calculating chemical formation energies for %s compounds" % len(list_of_mols)) rowdicts = [] for mol_dict in list_of_mols: mol_id = mol_dict['id'] diss_table = Molecule._GetDissociationTable(mol_dict['mol'], fmt=mol_dict['format']) try: mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \ diss_table.GetAnyMol() if mol is None: raise Exception("Cannot convert input string to Molecule: " + mol_dict['mol']) decomposition = G.Mol2Decomposition( mol, ignore_protonations=ignore_protonations) groupvec = decomposition.AsVector() dG0 = G.groupvec2val(groupvec) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) pmap = diss_table.GetPseudoisomerMap() if options.biochemical: dG0_prime = pmap.Transform(pH, pMg, I, T) rowdicts.append({ 'ID': mol_id, 'pH': pH, 'I': I, 'pMg': pMg, 'dG0\'': "%.1f" % dG0_prime, 'groupvec': str(groupvec) }) else: for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): rowdicts.append({ 'ID': mol_id, 'nH': p_nH, 'charge': p_z, 'nMg': p_nMg, 'dG0': "%.1f" % p_dG0, 'groupvec': str(groupvec) }) except GroupDecompositionError: rowdicts.append({'ID': mol_id, 'error': "cannot decompose"}) except GroupMissingTrainDataError: rowdicts.append({ 'ID': mol_id, 'groupvec': str(groupvec), 'error': "missing training data" }) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout if options.biochemical: titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec'] else: titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] csv_writer = csv.DictWriter(out_fp, titles) csv_writer.writeheader() csv_writer.writerows(rowdicts)
import unittest from pygibbs.groups_data import GroupsData from pygibbs import group_decomposition from toolbox.molecule import Molecule import logging PHOSPHATE = Molecule.FromSmiles('[O-]P([O-])(=O)O') ATP = Molecule.FromSmiles( 'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)O)O' ) A4P = Molecule.FromSmiles( 'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)O)O' ) class GroupsDecompositionTest(unittest.TestCase): """Tests for GroupsDecomposition""" def setUp(self): self.groups_decomposer = group_decomposition.GroupDecomposer.FromGroupsFile( open('../data/thermodynamics/groups_species.csv', 'r')) def testFindPhosphateChains(self): ps = group_decomposition.GroupDecomposer.FindPhosphateChains( PHOSPHATE, ignore_protonations=False) for unused_grp, l in ps: self.assertTrue(not l) mk_ps_dict = lambda ps: dict((key, l) for key, l in ps) mk_ps_string = lambda ps: ', '.join( ["%s x %d" % (str(key), len(l)) for key, l in ps if l != []])
logging.error(e) continue return atp = 'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O' coa = 'C1C=CN(C=C1C(=O)N)C2C(C(C(O2)COP(=O)(O)OP(=O)(O)OCC3C(C(C(O3)N4C=NC5=C4N=CN=C5N)O)O)O)O' glucose = 'C(C1C(C(C(C(O1)O)O)O)O)O' mgatp = 'C([C@@H]1[C@H]([C@H]([C@H](n2cnc3c(N)[nH+]cnc23)O1)O)O)OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-].[Mg+2].[Mg+2]' #smiless = [ # ('ATP', atp), # ('CoA', coa), ('Glucose', glucose), ('MgAtp', mgatp), # ] smiless = [('ATP', atp)] mols = [(name, Molecule.FromSmiles(s)) for name, s in smiless] for name, mol in mols: print name decomposition = decomposer.Decompose(mol) print decomposition.ToTableString() print 'Group count', decomposition.group_count print 'Net charge', decomposition.net_charge print 'Hydrogens', decomposition.hydrogens print 'Magnesiums', decomposition.magnesiums print 'Group Vector:' print decomposition.AsVector() print 'Pseudoisomer Vectors:' for v in decomposition.PseudoisomerVectors():
def main(): mol = Molecule.FromSmiles('C(O)(=O)C(=O)O') emp = EnzymeMarketplace() print emp.React(mol)
sum_conc = 0 for Ka_subset in itertools.combinations(Ka_list, i+1): # all choices of i values from the Ka list sum_conc += np.prod(Ka_subset) relative_conc.append(sum_conc) Ka_i = relative_conc[i+1] / relative_conc[i] transformed_pKas.append(-np.log10(Ka_i)) return transformed_pKas if __name__ == "__main__": diss_table_example = [4.0, 4.0, 4.0] new_diss_table = _TransformMultiples(diss_table_example) print diss_table_example print new_diss_table from toolbox.molecule import Molecule compound_list = [('glycine', 'C(=O)(O)CN'), ('CO2', 'O=C=O'), ('ATP', 'Nc1ncnc2n(cnc12)C1OC(COP([O-])(=O)OP([O-])(=O)OP(O)([O-])=O)C(O)C1O'), ('3-Ketoarabinitol', 'OCC(O)C(C(O)CO)=O')] for name, smiles in compound_list: diss_table1, major_ms = GetDissociationConstants(smiles, transform_multiples=False) diss_table2, major_ms = GetDissociationConstants(smiles, transform_multiples=True) m = Molecule.FromSmiles(major_ms) print name, m.ToInChI() for i in xrange(len(diss_table1)): print "%.2f %.2f" % (diss_table1[i][0], diss_table2[i][0])
def CalculateThermo(): options, _ = MakeOpts().parse_args(sys.argv) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout csv_writer = csv.writer(out_fp) csv_writer.writerow( ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel']) db = SqliteDatabase('../res/gibbs.sqlite', 'w') ugc = UnifiedGroupContribution(db) ugc.LoadGroups(True) ugc.LoadObservations(True) ugc.LoadGroupVectors(True) ugc.LoadData(True) result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids, ugc.b.copy(), ugc.anchored) g_pgc = result_dict['group_contributions'] P_L_pgc = result_dict['pgc_conservations'] sdfile = pybel.readfile("sdf", options.sdf_input_filename) for m in sdfile: try: try: mol = Molecule.FromOBMol(m.OBMol) except OpenBabelError: raise UnknownReactionEnergyError( "Cannot convert to OBMol object") mol.title = m.title mol.RemoveHydrogens() if mol.GetNumAtoms() > 200: raise UnknownReactionEnergyError( "Compound contains more than 200 atoms (n = %d)" % mol.GetNumAtoms()) try: decomposition = ugc.group_decomposer.Decompose( mol, ignore_protonations=False, strict=True) except GroupDecompositionError: raise UnknownReactionEnergyError("cannot decompose") groupvec = decomposition.AsVector() gv = np.matrix(groupvec.Flatten()) dG0 = float(g_pgc * gv.T) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() ker = list((P_L_pgc * gv.T).round(10).flat) try: diss_table = mol.GetDissociationTable() diss_table.SetFormationEnergyByNumHydrogens(dG0=dG0, nH=nH, nMg=nMg) except MissingDissociationConstantError: raise UnknownReactionEnergyError("missing pKa data") pmap = diss_table.GetPseudoisomerMap() for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): csv_writer.writerow([ m.title, None, p_nH, p_z, p_nMg, round(p_dG0, 1), str(ker) ]) except UnknownReactionEnergyError as e: csv_writer.writerow( [m.title, str(e), None, None, None, None, None]) out_fp.flush()
def CalculateThermo(): parser = MakeOpts() options, _ = parser.parse_args(sys.argv) pH, I, pMg, T = options.pH, options.I, options.pMg, options.T db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db) G.init() ignore_protonations = False list_of_mols = [] if options.smiles: list_of_mols.append({'id':options.smiles, 'mol':options.smiles, 'format':'smiles'}) elif options.inchi: list_of_mols.append({'id':options.inchi, 'mol':options.inchi, 'format':'inchi'}) elif options.csv_input_filename: for row in csv.DictReader(open(options.csv_input_filename, 'r')): if "InChI" in row: list_of_mols.append({'id':row["ID"], 'mol':row["InChI"], 'format':'inchi'}) elif "smiles" in row: list_of_mols.append({'id':row["ID"], 'mol':row["smiles"], 'format':'smiles'}) else: raise Exception("There must be one molecular ID column: InChI or smiles") else: parser.error("must use either -s or -c option") if options.biochemical: print ("Calculating biochemical formation energies for %s compounds" " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" % (len(list_of_mols), pH, I, pMg, T)) else: print ("Calculating chemical formation energies for %s compounds" % len(list_of_mols)) rowdicts = [] for mol_dict in list_of_mols: mol_id = mol_dict['id'] diss_table = Molecule._GetDissociationTable(mol_dict['mol'], fmt=mol_dict['format']) try: mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \ diss_table.GetAnyMol() if mol is None: raise Exception("Cannot convert input string to Molecule: " + mol_dict['mol']) decomposition = G.Mol2Decomposition(mol, ignore_protonations=ignore_protonations) groupvec = decomposition.AsVector() dG0 = G.groupvec2val(groupvec) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) pmap = diss_table.GetPseudoisomerMap() if options.biochemical: dG0_prime = pmap.Transform(pH, pMg, I, T) rowdicts.append({'ID':mol_id, 'pH':pH, 'I':I, 'pMg':pMg, 'dG0\'':"%.1f" % dG0_prime, 'groupvec':str(groupvec)}) else: for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): rowdicts.append({'ID':mol_id, 'nH':p_nH, 'charge':p_z, 'nMg':p_nMg, 'dG0':"%.1f" % p_dG0, 'groupvec':str(groupvec)}) except GroupDecompositionError: rowdicts.append({'ID':mol_id, 'error':"cannot decompose"}) except GroupMissingTrainDataError: rowdicts.append({'ID':mol_id, 'groupvec':str(groupvec), 'error':"missing training data"}) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout if options.biochemical: titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec'] else: titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] csv_writer = csv.DictWriter(out_fp, titles) csv_writer.writeheader() csv_writer.writerows(rowdicts)