Пример #1
0
    def get_atom_vector(self):
        """
            Returns a NumPy row array describing the number of atoms
            from each element (the column index is the atomic number
            of that element).
            The first column (index=0) contains the number of electrons.
        """
        atom_bag = self.get_atom_bag()
        if not atom_bag:
            return None

        atom_vector = np.zeros((Molecule.GetNumberOfElements() + 1),
                               dtype='int')
        for elem, count in atom_bag.iteritems():
            if elem in ['R', 'X']:
                return None  # wildcard compound!
            an = Molecule.GetAtomicNum(elem)
            if not an:
                logging.warning("Unsupported element in (C%05d): %s",
                                (self.cid, elem))
                return None
            atom_vector[an] = count

        atom_vector[0] = self.get_num_electrons()
        return atom_vector
Пример #2
0
    def get_num_electrons(self):
        """Return the putative number of electrons in the molecule."""
        mol = self.GetMolecule()
        if mol:
            return mol.GetNumElectrons()

        # if there is no InChI assume that self.formula is correct and that
        # the charge is 0.
        atom_bag = self.get_atom_bag()
        if not atom_bag:
            return None
        n_protons = 0
        for elem, count in atom_bag.iteritems():
            n_protons += count * Molecule.GetAtomicNum(elem)
        return n_protons
Пример #3
0
    def ConvertFormation2Reaction(self, output_fname):
        logging.info("Converting all formation energies to reactions")
        output_csv = csv.writer(open(output_fname, 'w'))

        # keep the format used for TECRDB
        output_csv.writerow(
            ('ref', 'ID', 'method', 'eval', 'EC', 'name', 'kegg_reaction',
             'reaction', 'dG0\'', 'T', 'I', 'pH', 'pMg'))

        atom2cid = {}
        for atom, (name, stoich) in KeggObservation.ATOM2ELEMENT.iteritems():
            cid, _, _ = self.kegg.name2cid(name, 0)
            if cid is None:
                raise Exception(
                    "Cannot find the element %s in the KEGG database" % name)
            atom2cid[atom] = (cid, stoich)
            #output_csv.writerow(('element',
            #                     'C%05d' % cid, 'formation', 'A', '',
            #                     'formation of %s' % self.kegg.cid2name(cid),
            #                     "C%05d" % cid,
            #                     name, 0, self.T, self.I, self.pH, self.pMg))

        for label in ['training', 'testing']:
            ptable = PsuedoisomerTableThermodynamics.FromCsvFile(
                self.FormationEnergyFileName, label=label)
            for cid in ptable.get_all_cids():
                pmatrix = ptable.cid2PseudoisomerMap(cid).ToMatrix()
                if len(pmatrix) != 1:
                    raise Exception("multiple training species for C%05d" %
                                    cid)
                nH, _charge, nMg, dG0 = pmatrix[0]
                diss_table = dissociation.GetDissociationTable(cid, False)
                if diss_table is None:
                    continue
                diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
                dG0_prime = diss_table.Transform(pH=self.pH,
                                                 I=self.I,
                                                 pMg=self.pMg,
                                                 T=self.T)
                ref = ptable.cid2SourceString(cid)

                atom_bag = self.kegg.cid2atom_bag(cid)
                if not atom_bag:
                    continue

                ne = self.kegg.cid2num_electrons(cid)
                elem_ne = 0
                sparse = {cid: 1}
                for elem, count in atom_bag.iteritems():
                    if elem == 'H':
                        continue
                    elem_ne += count * Molecule.GetAtomicNum(elem)
                    elem_cid, elem_coeff = atom2cid[elem]
                    sparse.setdefault(elem_cid, 0)
                    sparse[elem_cid] += -count * elem_coeff

                # use the H element to balance the electrons in the formation
                # reactions (we don't need to balance protons since this is
                # a biochemical reaction, so H+ are 'free').
                H_cid, H_coeff = atom2cid['H']
                sparse[H_cid] = (elem_ne - ne) * H_coeff
                reaction = Reaction(
                    "formation of %s" % self.kegg.cid2name(cid), sparse)

                output_csv.writerow(
                    (ref, 'C%05d' % cid, 'formation', 'A', '',
                     'formation of %s' % self.kegg.cid2name(cid),
                     reaction.FullReactionString(),
                     reaction.FullReactionString(show_cids=False),
                     '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg))