Exemplo n.º 1
0
    def get_atom_vector(self):
        """
            Returns a NumPy row array describing the number of atoms
            from each element (the column index is the atomic number
            of that element).
            The first column (index=0) contains the number of electrons.
        """
        atom_bag = self.get_atom_bag()
        if not atom_bag:
            return None

        atom_vector = np.zeros((Molecule.GetNumberOfElements() + 1),
                               dtype='int')
        for elem, count in atom_bag.iteritems():
            if elem in ['R', 'X']:
                return None  # wildcard compound!
            an = Molecule.GetAtomicNum(elem)
            if not an:
                logging.warning("Unsupported element in (C%05d): %s",
                                (self.cid, elem))
                return None
            atom_vector[an] = count

        atom_vector[0] = self.get_num_electrons()
        return atom_vector
Exemplo n.º 2
0
    def ToTableString(self):
        """Returns the decomposition as a tabular string."""
        spacer = '-' * 50 + '\n'
        l = [
            '%30s | %2s | %2s | %3s | %s\n' %
            ("group name", "nH", "z", "nMg", "nodes"), spacer
        ]

        for group, node_sets in self.groups:
            if group.hydrogens is None and group.charge is None and group.nMg is None:
                for n_set in node_sets:
                    s = '%30s |    |    |     | %s\n' % \
                        (group.name, ','.join([str(i) for i in n_set]))
                    l.append(s)
            else:
                for n_set in node_sets:
                    s = '%30s | %2d | %2d | %2d | %s\n' % \
                        (group.name, group.hydrogens or 0, group.charge or 0, group.nMg or 0,
                         ','.join([str(i) for i in n_set]))
                    l.append(s)

        if self.unassigned_nodes:
            l.append('\nUnassigned nodes: \n')
            l.append('%10s | %3s | %2s | %10s | %10s\n' %
                     ('index', 'an', 'el', 'valence', 'charge'))
            l.append(spacer)

            all_atoms = self.mol.GetAtoms()
            for i in self.unassigned_nodes:
                a = all_atoms[i]
                l.append('%10d | %3d | %2s | %10d | %10d\n' %
                         (i, a.atomicnum, Molecule.GetSymbol(
                             a.atomicnum), a.heavyvalence, a.formalcharge))
        return ''.join(l)
Exemplo n.º 3
0
def add_thermodynamics(cursor):
    from groups import GroupMissingTrainDataError, GroupDecompositionError

    gc = GroupContribution(sqlite_name="gibbs.sqlite", html_name="pathologic")
    gc.init()

    cursor.execute("DROP TABLE IF EXISTS yeast_inchi2thermo")
    cursor.execute(
        "CREATE TABLE yeast_inchi2thermo (inchi TEXT, charge INT, nH INT, dG0_f REAL)"
    )
    cursor.execute("DROP INDEX IF EXISTS yeast_inchi2thermo_idx")
    cursor.execute(
        "CREATE INDEX yeast_inchi2thermo_idx ON yeast_inchi2thermo (inchi);")

    inchi_list = []
    for row in cursor.execute("SELECT distinct(inchi) " \
                              "FROM yeast_species2inchi WHERE inchi IS NOT NULL"):
        inchi = row[0]
        inchi_list.append(str(inchi))

    for inchi in inchi_list:
        try:
            mol = Molecule.FromInChI(str(inchi))
            pmap = gc.Mol2PseudoisomerMap(mol)
            for ((z, nH), dG0) in pmap.iteritems():
                cursor.execute(
                    "INSERT INTO yeast_inchi2thermo VALUES(?,?,?,?)",
                    [inchi, z, nH, dG0])
        except (IOError, GroupMissingTrainDataError, GroupDecompositionError):
            sys.stderr.write(
                "Cannot convert the following InChI to a pybel Molecule")
Exemplo n.º 4
0
    def run(self):
        from toolbox.molecule import Molecule

        self.semaphore.acquire()

        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles,
                                                    fmt='smiles',
                                                    mid_pH=default_pH,
                                                    min_pKa=0,
                                                    max_pKa=14,
                                                    T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)

        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)

        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
Exemplo n.º 5
0
    def run(self):
        from toolbox.molecule import Molecule
        
        self.semaphore.acquire()
        
        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles',
            mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)
        
        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)
        
        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
Exemplo n.º 6
0
    def FromGroupsFile(fp, transformed=False):
        """Factory that initializes a GroupData from a CSV file."""
        list_of_groups = []

        logging.info('Reading the list of groups from %s' % fp.name)
        gid = 0
        for row in csv.DictReader(fp):
            if row.get('SKIP', False):
                logging.warning('Skipping group %s', row.get('NAME'))
                continue

            group_name = row['NAME']
            protons = int(row['PROTONS'])
            charge = int(row['CHARGE'])
            mgs = int(row['MAGNESIUMS'])
            smarts = row['SMARTS']
            focal_atoms = FocalSet(row['FOCAL_ATOMS'])
            _remark = row['REMARK']

            # Check that the smarts are good.
            if not Molecule.VerifySmarts(smarts):
                raise GroupsDataError('Cannot parse SMARTS: %s' % smarts)

            group = Group(gid, group_name, protons, charge, mgs, str(smarts),
                          focal_atoms)
            list_of_groups.append(group)
            gid += 1

        logging.info('Done reading groups data.')

        return GroupsData(list_of_groups, transformed)
Exemplo n.º 7
0
def GetMolInput(dissociation):
    mols = [
    ]  # a list of pairs of Molecule objects and stoichiometric coefficients
    while mols == []:
        print 'KEGG ID or SMILES (or Enter to quit):',
        s_input = raw_input()
        if not s_input:
            return []
        elif re.findall('C\d\d\d\d\d', s_input) != []:
            try:
                cid = int(s_input[1:])
                mols = [(GetMostAbundantMol(cid, dissociation), 1)]
                print "Compound:", mols[0][0].ToInChI()
            except ValueError:
                print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input
        elif re.findall('R\d\d\d\d\d', s_input) != []:
            try:
                rid = int(s_input[1:])
                reaction = Kegg.getInstance().rid2reaction(rid)
                print "Reaction:", str(reaction)
                for cid, coeff in reaction.iteritems():
                    mols += [(GetMostAbundantMol(cid, dissociation), coeff)]
            except ValueError:
                print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input
        else:
            try:
                mols = [(Molecule.FromSmiles(s_input), 1)]
                print "Compound:", mols[0][0].ToInChI()
            except Exception:
                print 'unable to parse SMILES string, please try again'

    return mols
Exemplo n.º 8
0
 def CreateEmptyGroupDecomposition(self):
     emptymol = Molecule.FromSmiles("")
     decomposition = self.Decompose(emptymol,
                                    ignore_protonations=True,
                                    strict=False)
     for i, (group, _node_sets) in enumerate(decomposition.groups):
         decomposition.groups[i] = (group, [])
     return decomposition
Exemplo n.º 9
0
 def SetInChI(self, inchi):
     if inchi == None:
         self.inchi = None
         self.mol = None
         self.formula = None
         self.mass = None
     else:
         self.inchi = inchi
         self.mol = Molecule.FromInChI(inchi)
         self.formula = self.mol.GetFormula()
         self.mass = self.mol.GetExactMass()
Exemplo n.º 10
0
    def GetMol(self, nH=None, nMg=0):
        from toolbox.molecule import Molecule

        if nH is None:
            nH = self.min_nH
        if (nH, nMg) not in self.mol_dict:
            return None
        s, mol = self.mol_dict[nH, nMg]
        if mol is None:
            mol = Molecule.FromSmiles(s)
        self.mol_dict[nH, nMg] = (s, mol)
        return mol
    def EstimateInChI(self, inchi):
        mol = Molecule.FromInChI(inchi)
        #mol.RemoveHydrogens()
        decomposition = self.group_decomposer.Decompose(
            mol, ignore_protonations=False, strict=True)

        nH = decomposition.Hydrogens()
        charge = decomposition.NetCharge()
        nMg = decomposition.Magnesiums()
        groupvec = decomposition.AsVector()
        dG0, ker = self.EstimateGroupVector(groupvec)
        return dG0, nH, charge, nMg, ker
Exemplo n.º 12
0
    def get_nH_and_charge(self):
        if not self.mol and self.inchi:
            self.mol = Molecule.FromInChI(self.inchi)

        if self.mol:
            return self.mol.GetHydrogensAndCharge()

        # if there is no InChI assume that self.formula is correct and that
        # it represents the number of H for the neutral species
        atom_bag = self.get_atom_bag()
        if not atom_bag:
            return None
        return atom_bag.get('H', 0), 0
Exemplo n.º 13
0
    def get_num_electrons(self):
        """Return the putative number of electrons in the molecule."""
        mol = self.GetMolecule()
        if mol:
            return mol.GetNumElectrons()

        # if there is no InChI assume that self.formula is correct and that
        # the charge is 0.
        atom_bag = self.get_atom_bag()
        if not atom_bag:
            return None
        n_protons = 0
        for elem, count in atom_bag.iteritems():
            n_protons += count * Molecule.GetAtomicNum(elem)
        return n_protons
Exemplo n.º 14
0
    def GetMolecule(self):
        """Gets a Molecule for this compound if possible.
        
        Returns None if no molecular data is available.
        """
        if self.mol:
            return self.mol

        if self.inchi:
            self.mol = Molecule.FromInChI(self.inchi)
            self.mol.SetTitle(self.name)
            return self.mol

        raise kegg_errors.KeggParseException(
            "C%05d (%s) doesn't have an explicit molecular structure" %
            (self.cid, self.name))
Exemplo n.º 15
0
def test_dissociation_table(diss,
                            group_decomposer,
                            id,
                            ignore_missing_smiles=False):
    if diss is None:
        logging.warning('%s: does not appear in the dissociation table' % id)
        return

    nH, nMg = diss.GetMostAbundantPseudoisomer(pH=default_pH,
                                               I=default_I,
                                               pMg=14,
                                               T=default_T)
    if nMg != 0:
        logging.warning('%s: default species has nMg = %d' % (id, nMg))
        return
    smiles = diss.GetSmiles(nH=nH, nMg=0)
    if not smiles:
        if not ignore_missing_smiles:
            logging.warning(
                '%s: no SMILES in the dissociation table for nH = %d' %
                (id, nH))
        return

    logging.debug('%s: nH = %d, smiles = %s' % (id, nH, smiles))
    mol = Molecule.FromSmiles(smiles)

    try:
        decomposition = group_decomposer.Decompose(mol,
                                                   ignore_protonations=False,
                                                   strict=True)
    except GroupDecompositionError:
        return

    groupvec = decomposition.AsVector()
    logging.debug("%s: decomposition = %s" % (id, groupvec))
    gc_nH = decomposition.Hydrogens()
    if nH != gc_nH:
        logging.warning(
            '%s: nH doesn\'t match: explicit = %d, decomposition = %d' %
            (id, nH, gc_nH))
Exemplo n.º 16
0
    def __init__(self, uid=None, name=None, all_names=None, mass=None,
                 formula=None, inchi=None, pubchem_id=None, cas=None, 
                 regulates=None, types=None, smiles=None):

            self.uid = uid;                     # UNIQUE-ID
            self.name = name                    # COMMON-NAME
            if (self.name):
                self.name = re.sub('<.+?>', '', name) # Removing HTML tags
            self.all_names = []                 # SYNONYMS
            if (all_names and len(all_names) > 0):
                for s in all_names:
                    self.all_names.append(re.sub('<.+?>', '', s))
            self.mass = mass                    # MOLECULAR-WEIGHT
            self.formula = formula              # CHEMICAL-FORMULA
            self.inchi = inchi if inchi != None else ""                  # INCHI
            self.pubchem_id = None              # Parsed from DBLINKS
            self.cas = ""                       # Parsed from DBLINKS
            self.regulates = regulates if regulates != None else []                 # REGULATES
            self.types = types if types != None else []                     # TYPES
            self.smiles = smiles if smiles != None else ""                 # SMILES
            if (smiles and not inchi):
                self.inchi = Molecule.Smiles2InChI(smiles)
Exemplo n.º 17
0
    def ConvertFormation2Reaction(self, output_fname):
        logging.info("Converting all formation energies to reactions")
        output_csv = csv.writer(open(output_fname, 'w'))

        # keep the format used for TECRDB
        output_csv.writerow(
            ('ref', 'ID', 'method', 'eval', 'EC', 'name', 'kegg_reaction',
             'reaction', 'dG0\'', 'T', 'I', 'pH', 'pMg'))

        atom2cid = {}
        for atom, (name, stoich) in KeggObservation.ATOM2ELEMENT.iteritems():
            cid, _, _ = self.kegg.name2cid(name, 0)
            if cid is None:
                raise Exception(
                    "Cannot find the element %s in the KEGG database" % name)
            atom2cid[atom] = (cid, stoich)
            #output_csv.writerow(('element',
            #                     'C%05d' % cid, 'formation', 'A', '',
            #                     'formation of %s' % self.kegg.cid2name(cid),
            #                     "C%05d" % cid,
            #                     name, 0, self.T, self.I, self.pH, self.pMg))

        for label in ['training', 'testing']:
            ptable = PsuedoisomerTableThermodynamics.FromCsvFile(
                self.FormationEnergyFileName, label=label)
            for cid in ptable.get_all_cids():
                pmatrix = ptable.cid2PseudoisomerMap(cid).ToMatrix()
                if len(pmatrix) != 1:
                    raise Exception("multiple training species for C%05d" %
                                    cid)
                nH, _charge, nMg, dG0 = pmatrix[0]
                diss_table = dissociation.GetDissociationTable(cid, False)
                if diss_table is None:
                    continue
                diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
                dG0_prime = diss_table.Transform(pH=self.pH,
                                                 I=self.I,
                                                 pMg=self.pMg,
                                                 T=self.T)
                ref = ptable.cid2SourceString(cid)

                atom_bag = self.kegg.cid2atom_bag(cid)
                if not atom_bag:
                    continue

                ne = self.kegg.cid2num_electrons(cid)
                elem_ne = 0
                sparse = {cid: 1}
                for elem, count in atom_bag.iteritems():
                    if elem == 'H':
                        continue
                    elem_ne += count * Molecule.GetAtomicNum(elem)
                    elem_cid, elem_coeff = atom2cid[elem]
                    sparse.setdefault(elem_cid, 0)
                    sparse[elem_cid] += -count * elem_coeff

                # use the H element to balance the electrons in the formation
                # reactions (we don't need to balance protons since this is
                # a biochemical reaction, so H+ are 'free').
                H_cid, H_coeff = atom2cid['H']
                sparse[H_cid] = (elem_ne - ne) * H_coeff
                reaction = Reaction(
                    "formation of %s" % self.kegg.cid2name(cid), sparse)

                output_csv.writerow(
                    (ref, 'C%05d' % cid, 'formation', 'A', '',
                     'formation of %s' % self.kegg.cid2name(cid),
                     reaction.FullReactionString(),
                     reaction.FullReactionString(show_cids=False),
                     '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg))
Exemplo n.º 18
0
def CalculateThermo():
    parser = MakeOpts()
    options, _ = parser.parse_args(sys.argv)
    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T

    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db)
    G.init()
    ignore_protonations = False

    list_of_mols = []
    if options.smiles:
        list_of_mols.append({
            'id': options.smiles,
            'mol': options.smiles,
            'format': 'smiles'
        })
    elif options.inchi:
        list_of_mols.append({
            'id': options.inchi,
            'mol': options.inchi,
            'format': 'inchi'
        })
    elif options.csv_input_filename:
        for row in csv.DictReader(open(options.csv_input_filename, 'r')):
            if "InChI" in row:
                list_of_mols.append({
                    'id': row["ID"],
                    'mol': row["InChI"],
                    'format': 'inchi'
                })
            elif "smiles" in row:
                list_of_mols.append({
                    'id': row["ID"],
                    'mol': row["smiles"],
                    'format': 'smiles'
                })
            else:
                raise Exception(
                    "There must be one molecular ID column: InChI or smiles")
    else:
        parser.error("must use either -s or -c option")

    if options.biochemical:
        print(
            "Calculating biochemical formation energies for %s compounds"
            " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" %
            (len(list_of_mols), pH, I, pMg, T))
    else:
        print("Calculating chemical formation energies for %s compounds" %
              len(list_of_mols))

    rowdicts = []
    for mol_dict in list_of_mols:
        mol_id = mol_dict['id']
        diss_table = Molecule._GetDissociationTable(mol_dict['mol'],
                                                    fmt=mol_dict['format'])
        try:
            mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \
                  diss_table.GetAnyMol()
            if mol is None:
                raise Exception("Cannot convert input string to Molecule: " +
                                mol_dict['mol'])

            decomposition = G.Mol2Decomposition(
                mol, ignore_protonations=ignore_protonations)
            groupvec = decomposition.AsVector()
            dG0 = G.groupvec2val(groupvec)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
            pmap = diss_table.GetPseudoisomerMap()

            if options.biochemical:
                dG0_prime = pmap.Transform(pH, pMg, I, T)
                rowdicts.append({
                    'ID': mol_id,
                    'pH': pH,
                    'I': I,
                    'pMg': pMg,
                    'dG0\'': "%.1f" % dG0_prime,
                    'groupvec': str(groupvec)
                })
            else:
                for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                    rowdicts.append({
                        'ID': mol_id,
                        'nH': p_nH,
                        'charge': p_z,
                        'nMg': p_nMg,
                        'dG0': "%.1f" % p_dG0,
                        'groupvec': str(groupvec)
                    })
        except GroupDecompositionError:
            rowdicts.append({'ID': mol_id, 'error': "cannot decompose"})
        except GroupMissingTrainDataError:
            rowdicts.append({
                'ID': mol_id,
                'groupvec': str(groupvec),
                'error': "missing training data"
            })

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout

    if options.biochemical:
        titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec']
    else:
        titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec']
    csv_writer = csv.DictWriter(out_fp, titles)
    csv_writer.writeheader()
    csv_writer.writerows(rowdicts)
Exemplo n.º 19
0
import unittest
from pygibbs.groups_data import GroupsData
from pygibbs import group_decomposition
from toolbox.molecule import Molecule
import logging

PHOSPHATE = Molecule.FromSmiles('[O-]P([O-])(=O)O')
ATP = Molecule.FromSmiles(
    'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)O)O'
)
A4P = Molecule.FromSmiles(
    'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)O)O'
)


class GroupsDecompositionTest(unittest.TestCase):
    """Tests for GroupsDecomposition"""
    def setUp(self):
        self.groups_decomposer = group_decomposition.GroupDecomposer.FromGroupsFile(
            open('../data/thermodynamics/groups_species.csv', 'r'))

    def testFindPhosphateChains(self):
        ps = group_decomposition.GroupDecomposer.FindPhosphateChains(
            PHOSPHATE, ignore_protonations=False)

        for unused_grp, l in ps:
            self.assertTrue(not l)

        mk_ps_dict = lambda ps: dict((key, l) for key, l in ps)
        mk_ps_string = lambda ps: ', '.join(
            ["%s x %d" % (str(key), len(l)) for key, l in ps if l != []])
Exemplo n.º 20
0
                logging.error(e)
                continue

        return

    atp = 'C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O'
    coa = 'C1C=CN(C=C1C(=O)N)C2C(C(C(O2)COP(=O)(O)OP(=O)(O)OCC3C(C(C(O3)N4C=NC5=C4N=CN=C5N)O)O)O)O'
    glucose = 'C(C1C(C(C(C(O1)O)O)O)O)O'
    mgatp = 'C([C@@H]1[C@H]([C@H]([C@H](n2cnc3c(N)[nH+]cnc23)O1)O)O)OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-].[Mg+2].[Mg+2]'

    #smiless = [
    #           ('ATP', atp),
    #           ('CoA', coa), ('Glucose', glucose), ('MgAtp', mgatp),
    #           ]
    smiless = [('ATP', atp)]
    mols = [(name, Molecule.FromSmiles(s)) for name, s in smiless]

    for name, mol in mols:
        print name
        decomposition = decomposer.Decompose(mol)
        print decomposition.ToTableString()
        print 'Group count', decomposition.group_count
        print 'Net charge', decomposition.net_charge
        print 'Hydrogens', decomposition.hydrogens
        print 'Magnesiums', decomposition.magnesiums

        print 'Group Vector:'
        print decomposition.AsVector()

        print 'Pseudoisomer Vectors:'
        for v in decomposition.PseudoisomerVectors():
Exemplo n.º 21
0
def main():
    mol = Molecule.FromSmiles('C(O)(=O)C(=O)O')
    emp = EnzymeMarketplace()
    print emp.React(mol)
Exemplo n.º 22
0
        sum_conc = 0
        for Ka_subset in itertools.combinations(Ka_list, i+1): # all choices of i values from the Ka list
            sum_conc += np.prod(Ka_subset)
        relative_conc.append(sum_conc)
        Ka_i = relative_conc[i+1] / relative_conc[i]
        transformed_pKas.append(-np.log10(Ka_i))
    
    return transformed_pKas

if __name__ == "__main__":
    
    diss_table_example = [4.0, 4.0, 4.0]
    new_diss_table = _TransformMultiples(diss_table_example)
    
    print diss_table_example
    print new_diss_table
    
    from toolbox.molecule import Molecule
    compound_list = [('glycine', 'C(=O)(O)CN'),
                     ('CO2', 'O=C=O'),
                     ('ATP', 'Nc1ncnc2n(cnc12)C1OC(COP([O-])(=O)OP([O-])(=O)OP(O)([O-])=O)C(O)C1O'),
                     ('3-Ketoarabinitol', 'OCC(O)C(C(O)CO)=O')]
    
    for name, smiles in compound_list:
        diss_table1, major_ms = GetDissociationConstants(smiles, transform_multiples=False)
        diss_table2, major_ms = GetDissociationConstants(smiles, transform_multiples=True)
        m = Molecule.FromSmiles(major_ms)
        print name, m.ToInChI()
        for i in xrange(len(diss_table1)):
            print "%.2f %.2f" % (diss_table1[i][0], diss_table2[i][0])
Exemplo n.º 23
0
def CalculateThermo():
    options, _ = MakeOpts().parse_args(sys.argv)

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout
    csv_writer = csv.writer(out_fp)
    csv_writer.writerow(
        ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'kernel'])

    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    ugc = UnifiedGroupContribution(db)
    ugc.LoadGroups(True)
    ugc.LoadObservations(True)
    ugc.LoadGroupVectors(True)
    ugc.LoadData(True)

    result_dict = ugc._GetContributionData(ugc.S.copy(), ugc.cids,
                                           ugc.b.copy(), ugc.anchored)

    g_pgc = result_dict['group_contributions']
    P_L_pgc = result_dict['pgc_conservations']

    sdfile = pybel.readfile("sdf", options.sdf_input_filename)
    for m in sdfile:
        try:
            try:
                mol = Molecule.FromOBMol(m.OBMol)
            except OpenBabelError:
                raise UnknownReactionEnergyError(
                    "Cannot convert to OBMol object")

            mol.title = m.title
            mol.RemoveHydrogens()
            if mol.GetNumAtoms() > 200:
                raise UnknownReactionEnergyError(
                    "Compound contains more than 200 atoms (n = %d)" %
                    mol.GetNumAtoms())

            try:
                decomposition = ugc.group_decomposer.Decompose(
                    mol, ignore_protonations=False, strict=True)
            except GroupDecompositionError:
                raise UnknownReactionEnergyError("cannot decompose")

            groupvec = decomposition.AsVector()
            gv = np.matrix(groupvec.Flatten())
            dG0 = float(g_pgc * gv.T)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            ker = list((P_L_pgc * gv.T).round(10).flat)
            try:
                diss_table = mol.GetDissociationTable()
                diss_table.SetFormationEnergyByNumHydrogens(dG0=dG0,
                                                            nH=nH,
                                                            nMg=nMg)
            except MissingDissociationConstantError:
                raise UnknownReactionEnergyError("missing pKa data")
            pmap = diss_table.GetPseudoisomerMap()
            for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                csv_writer.writerow([
                    m.title, None, p_nH, p_z, p_nMg,
                    round(p_dG0, 1),
                    str(ker)
                ])

        except UnknownReactionEnergyError as e:
            csv_writer.writerow(
                [m.title, str(e), None, None, None, None, None])

        out_fp.flush()
Exemplo n.º 24
0
def CalculateThermo():
    parser = MakeOpts()
    options, _ = parser.parse_args(sys.argv)
    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T

    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db)
    G.init()
    ignore_protonations = False

    list_of_mols = []
    if options.smiles:
        list_of_mols.append({'id':options.smiles, 'mol':options.smiles,
            'format':'smiles'})
    elif options.inchi:
        list_of_mols.append({'id':options.inchi, 'mol':options.inchi,
            'format':'inchi'})
    elif options.csv_input_filename:
        for row in csv.DictReader(open(options.csv_input_filename, 'r')):
            if "InChI" in row:
                list_of_mols.append({'id':row["ID"], 'mol':row["InChI"],
                                     'format':'inchi'})
            elif "smiles" in row:
                list_of_mols.append({'id':row["ID"], 'mol':row["smiles"],
                                     'format':'smiles'})
            else:
                raise Exception("There must be one molecular ID column: InChI or smiles")
    else:
        parser.error("must use either -s or -c option")
    
    if options.biochemical:
        print ("Calculating biochemical formation energies for %s compounds" 
               " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" %  
               (len(list_of_mols), pH, I, pMg, T))
    else:
        print ("Calculating chemical formation energies for %s compounds" % 
               len(list_of_mols))
    
    rowdicts = []
    for mol_dict in list_of_mols:
        mol_id = mol_dict['id']
        diss_table = Molecule._GetDissociationTable(mol_dict['mol'],
                                                    fmt=mol_dict['format'])
        try:
            mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \
                  diss_table.GetAnyMol()
            if mol is None:
                raise Exception("Cannot convert input string to Molecule: " + 
                                mol_dict['mol'])
            
            decomposition = G.Mol2Decomposition(mol, 
                ignore_protonations=ignore_protonations)
            groupvec = decomposition.AsVector()
            dG0 = G.groupvec2val(groupvec)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
            pmap = diss_table.GetPseudoisomerMap()
            
            if options.biochemical:
                dG0_prime = pmap.Transform(pH, pMg, I, T)
                rowdicts.append({'ID':mol_id, 'pH':pH, 'I':I, 'pMg':pMg,
                                 'dG0\'':"%.1f" % dG0_prime, 'groupvec':str(groupvec)})
            else:
                for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                    rowdicts.append({'ID':mol_id, 'nH':p_nH, 'charge':p_z, 'nMg':p_nMg,
                                     'dG0':"%.1f" % p_dG0, 'groupvec':str(groupvec)})
        except GroupDecompositionError:
            rowdicts.append({'ID':mol_id, 'error':"cannot decompose"})
        except GroupMissingTrainDataError:
            rowdicts.append({'ID':mol_id, 'groupvec':str(groupvec),
                             'error':"missing training data"})
        
    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout
    
    if options.biochemical:
        titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec']
    else:
        titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] 
    csv_writer = csv.DictWriter(out_fp, titles)
    csv_writer.writeheader()
    csv_writer.writerows(rowdicts)