def get_point_weight(mol, probe_rad=1.4): """Form a set of weighted points, with each point representing an atom in a molecule. Parameters: mol - rdkit.Chem.rdchem.Mol molecule probe_rad - probe radius, default of 1.4 Angstrom Returns the weights of the points (squared (VDW radius + probe radius)). """ coor = mol.GetConformer().GetPositions() tbl = GetPeriodicTable() pts_num = coor.shape[0] weights = np.zeros(pts_num) for i in range(pts_num): weights[i] = (tbl.GetRvdw(mol.GetAtomWithIdx(int(i)).GetAtomicNum()) + probe_rad)**2 return weights
def ReadAtomSuffix(self, tree, atom): constraint = None #'+','-','.',':','+.','-.','*' if tree[0] == '+.': constraint = AtomRadical(False, ConstraintNumber('=1')) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '-.': constraint = AtomRadical(False, ConstraintNumber('=1')) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1)) elif tree[0] == '+': atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '-': atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1)) elif tree[0] == '.': constraint = AtomRadical(False, ConstraintNumber('=1')) elif tree[0] == ':': constraint = AtomRadical(False, ConstraintNumber('=2')) elif tree[0] == ':.': constraint = AtomRadical(False, ConstraintNumber('=3')) elif tree[0] == '*': from rdkit.Chem import GetPeriodicTable #if type(atom).__name__ == 'QueryAtom': # raise NotImplementedError('Onium $,&,X atoms not supported yet') atomicnum = atom.GetAtomicNum() atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum) valence = GetPeriodicTable().GetDefaultValence(atomicnum) atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence + 1)) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '?': pass else: s = "Unsupported atom suffic: '" + tree[0] + "'" raise NotImplementedError(s) return constraint
def __call__(self,comb_mol,mapped_index): atom = comb_mol.GetAtomWithIdx(mapped_index[self.idx]) if self.valence != 0: from rdkit.Chem import GetPeriodicTable atomicnum = atom.GetAtomicNum() atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum) valence = GetPeriodicTable().GetDefaultValence(atomicnum) atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence+self.valence)) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(self.charge)) comb_mol.ReplaceAtom(self.idx,atom) atom.SetNumRadicalElectrons(self.radical) atom.SetFormalCharge(self.charge)
def toStringRep(v): """Generate a string representation of a molvector :param v: molvector """ p = GetPeriodicTable() chunksize = atom_size + bond_chunk_size nchunks = len(v) // chunksize m = RWMol() out = [] for i in range(nchunks): start = i * (atom_size + bond_chunk_size) el, c, h, b1, o1, b2, o2, b3, o3, b4, o4 = v[start:start + chunksize] el = ("%2s" % p.GetElementSymbol(el)).replace(" ", "_") out.append(el) assert c < 10 out.append(str(c)) assert h < 10 out.append(str(h)) for btype, o in ((b1, o1), (b2, o2), (b3, o3), (b4, o4)): out.append(bond_symbols[btype]) out.append("%04d" % o) return "".join(out)
Element objects for each chemical element (1-112) have also been declared as module-level variables, using each element's symbol as its variable name. The :meth:`getElement` method can also be used to retrieve the :class:`Element` object associated with an atomic number or symbol. Generally applications will want to use these objects, both to conserve memory and to make for easy comparisons. """ import cython from rdkit.Chem import GetPeriodicTable from rmgpy.exceptions import ElementError ################################################################################ _rdkit_periodic_table = GetPeriodicTable() class Element: """ A chemical element. The attributes are: ============= =============== ================================================ Attribute Type Description ============= =============== ================================================ `number` ``int`` The atomic number of the element `symbol` ``str`` The symbol used for the element `name` ``str`` The IUPAC name of the element `mass` ``float`` The mass of the element in kg/mol `covRadius` ``float`` Covalent bond radius in Angstrom `isotope` ``int`` The isotope integer of the element
type=str, default=None, help='Path to molecules to use as seeds') # Parse the arguments args = arg_parser.parse_args() run_params = args.__dict__ # Get the list of elements # We want those where SMILES supports implicit valences mpnn_dir = os.path.join('notebooks', 'mpnn-training') with open(os.path.join(mpnn_dir, 'atom_types.json')) as fp: atom_types = json.load(fp) with open(os.path.join(mpnn_dir, 'bond_types.json')) as fp: bond_types = json.load(fp) pt = GetPeriodicTable() if len(args.elements) == 0: elements = [pt.GetElementSymbol(i) for i in atom_types] else: elements = args.elements elements = [e for e in elements if MolFromSmiles(e) is not None] logger.info(f'Using {len(elements)} elements: {elements}') # Prepare the one-shot model. We the molecules to compare against and the comparison model with open(os.path.join('seed-molecules', 'top_100_pIC50.json')) as fp: comparison_mols = [convert_smiles_to_nx(s) for s in json.load(fp)] oneshot_dir = 'similarity' oneshot_model = load_model(os.path.join(oneshot_dir, 'oneshot_model.h5'), custom_objects=custom_objects) with open(os.path.join(oneshot_dir, 'atom_types.json')) as fp: os_atom_types = json.load(fp)
def __init__(self): self.PeriodicaTable = GetPeriodicTable()
def getOriginalIdentifiers(mol, prop=[ 'AtomicNumber', 'AtomicMass', 'TotalConnections', 'HCount', 'HeavyNeighborCount', 'FormalCharge', 'DeltaMass', 'IsTerminalAtom', 'SolidAngle', 'SolidAngleValue', 'SolidAngleSign' ], sa_dict=None, includeAtoms=None, radius=2, hash_type='str', idf_power=64): """Compute the original identifiers for atoms in a molecule based on atomic properties. Note it only includes HEAVY atoms. Parameters: mol - rdkit.Chem.rdchem.Mol molecule prop - atomic property list 'AtomicNumber': the atomic number of atom 'AtomicMass': the mass of atom 'TotalConnections': the degree of the atom in the molecule including Hs 'HeavyNeighborCount': the number of heavy (non-hydrogen) neighbor atoms 'HCount': the number of attached hydrogens (both implicit and explicit) 'FormalCharge': the formal charge of atom 'DeltaMass': the difference between atomic mass and atomic weight (weighted average of atomic masses) 'IsTerminalAtom': indicates whether the atom is a terminal atom 'SolidAngle': the solid angle of the atom on the molecule surface (> 0: convex, < 0: concave) 'SolidAngleValue': the absolute solid angle of the atom on the molecule surface 'SolidAngleSign': the sign of solid angle of the atom (-1, 0, 1) sa_dict - a dictionary mapping atom indices to their solid angles includeAtoms - atom indices for getting identifiers radius - ECFP radius, only calculates the identifiers of atoms in the neighborhoods (of radius) of included atoms (includeAtoms) hash_type - type for hash the properties, can be 'str' or 'vec' idf_power - power for the 'str' hash type (default 64-bit integers) Returns an dictionary mapping each heavy-atom index to an integer representing the atomic properties """ tbl = GetPeriodicTable() idf_dict = {} nAtoms = mol.GetNumAtoms() if includeAtoms is None: indices = range(nAtoms) else: indices = includeAtoms for i in indices: index = int(i) env = list( Chem.FindAtomEnvironmentOfRadiusN(mol, radius, index, useHs=True)) env_aids = set( [mol.GetBondWithIdx(bid).GetBeginAtomIdx() for bid in env] + [mol.GetBondWithIdx(bid).GetEndAtomIdx() for bid in env]) for aid in env_aids: if (aid, 0) not in idf_dict: atom = mol.GetAtomWithIdx(aid) if atom.GetAtomicNum() > 1: properties = [] if 'AtomicNumber' in prop: properties.append(atom.GetAtomicNum()) if 'AtomicMass' in prop: tmp_prop = atom.GetMass( ) if hash_type == 'vec' else '%.2f' % atom.GetMass() properties.append(tmp_prop) if 'TotalConnections' in prop: properties.append(atom.GetDegree()) if 'HCount' in prop: properties.append(atom.GetNumExplicitHs()) if 'HeavyNeighborCount' in prop: properties.append( len([ bond.GetOtherAtom(atom) for bond in atom.GetBonds() if bond.GetOtherAtom(atom).GetAtomicNum() > 1 ])) if 'FormalCharge' in prop: tmp_prop = atom.GetFormalCharge( ) if hash_type == 'vec' else '%.2f' % atom.GetFormalCharge( ) properties.append(tmp_prop) if 'DeltaMass' in prop: tmp_prop = atom.GetMass() - tbl.GetAtomicWeight( atom.GetAtomicNum()) tmp_prop = tmp_prop if hash_type == 'vec' else '%.2f' % tmp_prop properties.append() if 'IsTerminalAtom' in prop: is_terminal_atom = 1 if atom.GetDegree() == 1 else 0 properties.append(is_terminal_atom) if len([p for p in prop if 'SolidAngle' in p]) > 0: sa = sa_dict[aid] solang = 0 if (sa is None) else sa if 'SolidAngle' in prop: tmp_prop = solang if hash_type == 'vec' else '%.2f' % solang properties.append(tmp_prop) elif 'SolidAngleValue' in prop: tmp_prop = abs( solang ) if hash_type == 'vec' else '%.2f' % abs(solang) properties.append(tmp_prop) else: solang_sign = '0' if (sa in [None, 0]) else ( '+' if sa > 0 else '-') properties.append(solang_sign) if hash_type == 'str': idf = hash_ecfp(ecfp=','.join( [str(p) for p in properties]), power=idf_power) elif hash_type == 'vec': idf = hash(tuple(properties)) else: print('Wrong hash type!') return idf_dict idf_dict[(aid, 0)] = idf return idf_dict
def get_atom_proplist(mol, sa_dict=None, aids=[], base_prop=['AtomicMass'], hash_type='str'): """ Compute the average properties for a set of atoms in mol (indexed by aids). Parameters: mol - a rdkit.Chem.rdchem.Mol molecule sa_dict - a dictionary mapping atom indices to their solid angles aids - the indices of atoms base_prop - the property list for the computations hash_type - type for hash the properties, can be 'str' or 'vec' Returns the computed property list, for 'str' return ['xxx', 'xx.xx', ...] (float number are recorded as %.2f), for 'vec' return the prop list """ tbl = GetPeriodicTable() proplist = [] if len(aids) == 0: return proplist else: proplist = { 'AtomicMass': 0, 'TotalConnections': 0, 'HCount': 0, 'HeavyNeighborCount': 0, 'FormalCharge': 0, 'DeltaMass': 0, 'SolidAngle': 0, 'SolidAngleValue': 0, 'SolidAngleSign': '' } # compute averaged property for aid in aids: atom = mol.GetAtomWithIdx(aid) if 'AtomicMass' in base_prop: proplist['AtomicMass'] += atom.GetMass() if 'TotalConnections' in base_prop: proplist['TotalConnections'] += atom.GetDegree() if 'HCount' in base_prop: proplist['HCount'] += atom.GetNumExplicitHs() if 'HeavyNeighborCount' in base_prop: proplist['HeavyNeighborCount'] += len([ bond.GetOtherAtom(atom) for bond in atom.GetBonds() if bond.GetOtherAtom(atom).GetAtomicNum() > 1 ]) if 'FormalCharge' in base_prop: proplist['FormalCharge'] += atom.GetFormalCharge() if 'DeltaMass' in base_prop: proplist['DeltaMass'] += ( atom.GetMass() - tbl.GetAtomicWeight(atom.GetAtomicNum())) if len([p for p in base_prop if 'SolidAngle' in p]) > 0: sa = sa_dict[aid] tmp_prop = 0 if (sa is None) else sa proplist['SolidAngle'] += tmp_prop if 'SolidAngleValue' in base_prop: proplist['SolidAngleValue'] = abs(proplist['SolidAngle']) if 'SolidAngleSign' in base_prop: ref = proplist['SolidAngle'] proplist['SolidAngleSign'] = '0' if ref == 0 else ( '+' if ref > 0 else '-') # get str or vec for later hashing prop = {k: v for (k, v) in proplist.items() if k in base_prop} if hash_type == 'str': for key in prop: prop[key] = '%.2f' % ( prop[key] / len(aids)) if key != 'SolidAngleSign' else prop[key] elif hash_type != 'vec': print('Wrong hash type!') return proplist return [v for (k, v) in prop.items()]
#!/usr/bin/env python from __future__ import division import sys import numpy as np from rdkit.Chem import GetPeriodicTable pt = GetPeriodicTable() def skiplines(openfile, nlines=0): ''' Function to skip nlines + 1 lines in openfile. In other words, if nlines=0 it will go to the next line. Parameters ---------- openfile: object. File object to process. nlines: int. Number of lines to skip. Returns ------- line: string. Line after skipping nlines + 1 lines. ''' for i in range(nlines + 1): line = next(openfile) return line
import json import pickle import copy import math from rdkit.Chem import (AddHs, MolFromSmiles, inchi, GetPeriodicTable, Conformer, MolToSmiles) from rdkit.Chem.AllChem import (EmbedMultipleConfs, UFFGetMoleculeForceField, MMFFGetMoleculeForceField, MMFFGetMoleculeProperties, GetConformerRMS) from rdkit.Chem.rdmolops import RemoveHs, GetFormalCharge from nff.utils.misc import read_csv, tqdm_enum from nff.data.parallel import gen_parallel PERIODICTABLE = GetPeriodicTable() UFF_ELEMENTS = ['B', 'Al'] DEFAULT_GEOM_COMPARE_TIMEOUT = 300 XYZ_NAME = "{0}_Conf_{1}.xyz" MAX_CONFS = 10000 AU_TO_KCAL = 627.509 KB_KCAL = 0.001985875 def write_xyz(coords, filename, comment): ''' Write an xyz file from coords ''' with open(filename, "w") as f_p: f_p.write(str(len(coords)) + "\n")
def mol_to_dgl(mol): """Featurizes an rdkit mol object to a DGL Graph, with node and edge features Parameters ---------- mol : rdkit mol Returns ------- dgl.graph """ g = dgl.DGLGraph() g.add_nodes(mol.GetNumAtoms()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # Atom features atom_features = [] pd = GetPeriodicTable() # ComputeGasteigerCharges(mol) for atom in mol.GetAtoms(): atom_feat = [] atom_type = [0] * len(ATOM_TYPES) atom_type[ATOM_TYPES.index(atom.GetSymbol())] = 1 chiral = [0] * len(CHIRALITY) chiral[CHIRALITY.index(atom.GetChiralTag())] = 1 ex_valence = atom.GetExplicitValence() charge = atom.GetFormalCharge() hybrid = [0] * len(HYBRIDIZATION) hybrid[HYBRIDIZATION.index(atom.GetHybridization())] = 1 degree = atom.GetDegree() valence = atom.GetImplicitValence() aromatic = int(atom.GetIsAromatic()) ex_hs = atom.GetNumExplicitHs() im_hs = atom.GetNumImplicitHs() rad = atom.GetNumRadicalElectrons() ring = int(atom.IsInRing()) mass = pd.GetAtomicWeight(atom.GetSymbol()) vdw = pd.GetRvdw(atom.GetSymbol()) # pcharge = float(atom.GetProp("_GasteigerCharge")) atom_feat.extend(atom_type) atom_feat.extend(chiral) atom_feat.append(ex_valence) atom_feat.append(charge) atom_feat.extend(hybrid) atom_feat.append(degree) atom_feat.append(valence) atom_feat.append(aromatic) atom_feat.append(ex_hs) atom_feat.append(im_hs) atom_feat.append(rad) atom_feat.append(ring) atom_feat.append(mass) atom_feat.append(vdw) # atom_feat.append(pcharge) atom_features.append(atom_feat) for bond in mol.GetBonds(): g.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) g.ndata["feat"] = torch.FloatTensor(atom_features) # Bond features bond_features = [] for bond in mol.GetBonds(): bond_feat = [] bond_type = [0] * len(BOND_TYPES) bond_type[BOND_TYPES.index(bond.GetBondType())] = 1 bond_stereo = [0] * len(BOND_STEREO) bond_stereo[BOND_STEREO.index(bond.GetStereo())] = 1 bond_feat.extend(bond_type) bond_feat.extend(bond_stereo) bond_feat.append(float(bond.GetIsConjugated())) bond_feat.append(float(bond.IsInRing())) bond_features.append(bond_feat) g.edata["feat"] = torch.FloatTensor(bond_features) return g