def get_atom_vector(self): """ Returns a NumPy row array describing the number of atoms from each element (the column index is the atomic number of that element). The first column (index=0) contains the number of electrons. """ atom_bag = self.get_atom_bag() if not atom_bag: return None atom_vector = np.zeros((Molecule.GetNumberOfElements() + 1), dtype='int') for elem, count in atom_bag.iteritems(): if elem in ['R', 'X']: return None # wildcard compound! an = Molecule.GetAtomicNum(elem) if not an: logging.warning("Unsupported element in (C%05d): %s", (self.cid, elem)) return None atom_vector[an] = count atom_vector[0] = self.get_num_electrons() return atom_vector
def get_num_electrons(self): """Return the putative number of electrons in the molecule.""" mol = self.GetMolecule() if mol: return mol.GetNumElectrons() # if there is no InChI assume that self.formula is correct and that # the charge is 0. atom_bag = self.get_atom_bag() if not atom_bag: return None n_protons = 0 for elem, count in atom_bag.iteritems(): n_protons += count * Molecule.GetAtomicNum(elem) return n_protons
def ConvertFormation2Reaction(self, output_fname): logging.info("Converting all formation energies to reactions") output_csv = csv.writer(open(output_fname, 'w')) # keep the format used for TECRDB output_csv.writerow( ('ref', 'ID', 'method', 'eval', 'EC', 'name', 'kegg_reaction', 'reaction', 'dG0\'', 'T', 'I', 'pH', 'pMg')) atom2cid = {} for atom, (name, stoich) in KeggObservation.ATOM2ELEMENT.iteritems(): cid, _, _ = self.kegg.name2cid(name, 0) if cid is None: raise Exception( "Cannot find the element %s in the KEGG database" % name) atom2cid[atom] = (cid, stoich) #output_csv.writerow(('element', # 'C%05d' % cid, 'formation', 'A', '', # 'formation of %s' % self.kegg.cid2name(cid), # "C%05d" % cid, # name, 0, self.T, self.I, self.pH, self.pMg)) for label in ['training', 'testing']: ptable = PsuedoisomerTableThermodynamics.FromCsvFile( self.FormationEnergyFileName, label=label) for cid in ptable.get_all_cids(): pmatrix = ptable.cid2PseudoisomerMap(cid).ToMatrix() if len(pmatrix) != 1: raise Exception("multiple training species for C%05d" % cid) nH, _charge, nMg, dG0 = pmatrix[0] diss_table = dissociation.GetDissociationTable(cid, False) if diss_table is None: continue diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) dG0_prime = diss_table.Transform(pH=self.pH, I=self.I, pMg=self.pMg, T=self.T) ref = ptable.cid2SourceString(cid) atom_bag = self.kegg.cid2atom_bag(cid) if not atom_bag: continue ne = self.kegg.cid2num_electrons(cid) elem_ne = 0 sparse = {cid: 1} for elem, count in atom_bag.iteritems(): if elem == 'H': continue elem_ne += count * Molecule.GetAtomicNum(elem) elem_cid, elem_coeff = atom2cid[elem] sparse.setdefault(elem_cid, 0) sparse[elem_cid] += -count * elem_coeff # use the H element to balance the electrons in the formation # reactions (we don't need to balance protons since this is # a biochemical reaction, so H+ are 'free'). H_cid, H_coeff = atom2cid['H'] sparse[H_cid] = (elem_ne - ne) * H_coeff reaction = Reaction( "formation of %s" % self.kegg.cid2name(cid), sparse) output_csv.writerow( (ref, 'C%05d' % cid, 'formation', 'A', '', 'formation of %s' % self.kegg.cid2name(cid), reaction.FullReactionString(), reaction.FullReactionString(show_cids=False), '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg))