def get_atom_proplist(mol, sa_dict=None, aids=[], base_prop=['AtomicMass'], hash_type='str'): """ Compute the average properties for a set of atoms in mol (indexed by aids). Parameters: mol - a rdkit.Chem.rdchem.Mol molecule sa_dict - a dictionary mapping atom indices to their solid angles aids - the indices of atoms base_prop - the property list for the computations hash_type - type for hash the properties, can be 'str' or 'vec' Returns the computed property list, for 'str' return ['xxx', 'xx.xx', ...] (float number are recorded as %.2f), for 'vec' return the prop list """ tbl = GetPeriodicTable() proplist = [] if len(aids) == 0: return proplist else: proplist = { 'AtomicMass': 0, 'TotalConnections': 0, 'HCount': 0, 'HeavyNeighborCount': 0, 'FormalCharge': 0, 'DeltaMass': 0, 'SolidAngle': 0, 'SolidAngleValue': 0, 'SolidAngleSign': '' } # compute averaged property for aid in aids: atom = mol.GetAtomWithIdx(aid) if 'AtomicMass' in base_prop: proplist['AtomicMass'] += atom.GetMass() if 'TotalConnections' in base_prop: proplist['TotalConnections'] += atom.GetDegree() if 'HCount' in base_prop: proplist['HCount'] += atom.GetNumExplicitHs() if 'HeavyNeighborCount' in base_prop: proplist['HeavyNeighborCount'] += len([ bond.GetOtherAtom(atom) for bond in atom.GetBonds() if bond.GetOtherAtom(atom).GetAtomicNum() > 1 ]) if 'FormalCharge' in base_prop: proplist['FormalCharge'] += atom.GetFormalCharge() if 'DeltaMass' in base_prop: proplist['DeltaMass'] += ( atom.GetMass() - tbl.GetAtomicWeight(atom.GetAtomicNum())) if len([p for p in base_prop if 'SolidAngle' in p]) > 0: sa = sa_dict[aid] tmp_prop = 0 if (sa is None) else sa proplist['SolidAngle'] += tmp_prop if 'SolidAngleValue' in base_prop: proplist['SolidAngleValue'] = abs(proplist['SolidAngle']) if 'SolidAngleSign' in base_prop: ref = proplist['SolidAngle'] proplist['SolidAngleSign'] = '0' if ref == 0 else ( '+' if ref > 0 else '-') # get str or vec for later hashing prop = {k: v for (k, v) in proplist.items() if k in base_prop} if hash_type == 'str': for key in prop: prop[key] = '%.2f' % ( prop[key] / len(aids)) if key != 'SolidAngleSign' else prop[key] elif hash_type != 'vec': print('Wrong hash type!') return proplist return [v for (k, v) in prop.items()]
def getOriginalIdentifiers(mol, prop=[ 'AtomicNumber', 'AtomicMass', 'TotalConnections', 'HCount', 'HeavyNeighborCount', 'FormalCharge', 'DeltaMass', 'IsTerminalAtom', 'SolidAngle', 'SolidAngleValue', 'SolidAngleSign' ], sa_dict=None, includeAtoms=None, radius=2, hash_type='str', idf_power=64): """Compute the original identifiers for atoms in a molecule based on atomic properties. Note it only includes HEAVY atoms. Parameters: mol - rdkit.Chem.rdchem.Mol molecule prop - atomic property list 'AtomicNumber': the atomic number of atom 'AtomicMass': the mass of atom 'TotalConnections': the degree of the atom in the molecule including Hs 'HeavyNeighborCount': the number of heavy (non-hydrogen) neighbor atoms 'HCount': the number of attached hydrogens (both implicit and explicit) 'FormalCharge': the formal charge of atom 'DeltaMass': the difference between atomic mass and atomic weight (weighted average of atomic masses) 'IsTerminalAtom': indicates whether the atom is a terminal atom 'SolidAngle': the solid angle of the atom on the molecule surface (> 0: convex, < 0: concave) 'SolidAngleValue': the absolute solid angle of the atom on the molecule surface 'SolidAngleSign': the sign of solid angle of the atom (-1, 0, 1) sa_dict - a dictionary mapping atom indices to their solid angles includeAtoms - atom indices for getting identifiers radius - ECFP radius, only calculates the identifiers of atoms in the neighborhoods (of radius) of included atoms (includeAtoms) hash_type - type for hash the properties, can be 'str' or 'vec' idf_power - power for the 'str' hash type (default 64-bit integers) Returns an dictionary mapping each heavy-atom index to an integer representing the atomic properties """ tbl = GetPeriodicTable() idf_dict = {} nAtoms = mol.GetNumAtoms() if includeAtoms is None: indices = range(nAtoms) else: indices = includeAtoms for i in indices: index = int(i) env = list( Chem.FindAtomEnvironmentOfRadiusN(mol, radius, index, useHs=True)) env_aids = set( [mol.GetBondWithIdx(bid).GetBeginAtomIdx() for bid in env] + [mol.GetBondWithIdx(bid).GetEndAtomIdx() for bid in env]) for aid in env_aids: if (aid, 0) not in idf_dict: atom = mol.GetAtomWithIdx(aid) if atom.GetAtomicNum() > 1: properties = [] if 'AtomicNumber' in prop: properties.append(atom.GetAtomicNum()) if 'AtomicMass' in prop: tmp_prop = atom.GetMass( ) if hash_type == 'vec' else '%.2f' % atom.GetMass() properties.append(tmp_prop) if 'TotalConnections' in prop: properties.append(atom.GetDegree()) if 'HCount' in prop: properties.append(atom.GetNumExplicitHs()) if 'HeavyNeighborCount' in prop: properties.append( len([ bond.GetOtherAtom(atom) for bond in atom.GetBonds() if bond.GetOtherAtom(atom).GetAtomicNum() > 1 ])) if 'FormalCharge' in prop: tmp_prop = atom.GetFormalCharge( ) if hash_type == 'vec' else '%.2f' % atom.GetFormalCharge( ) properties.append(tmp_prop) if 'DeltaMass' in prop: tmp_prop = atom.GetMass() - tbl.GetAtomicWeight( atom.GetAtomicNum()) tmp_prop = tmp_prop if hash_type == 'vec' else '%.2f' % tmp_prop properties.append() if 'IsTerminalAtom' in prop: is_terminal_atom = 1 if atom.GetDegree() == 1 else 0 properties.append(is_terminal_atom) if len([p for p in prop if 'SolidAngle' in p]) > 0: sa = sa_dict[aid] solang = 0 if (sa is None) else sa if 'SolidAngle' in prop: tmp_prop = solang if hash_type == 'vec' else '%.2f' % solang properties.append(tmp_prop) elif 'SolidAngleValue' in prop: tmp_prop = abs( solang ) if hash_type == 'vec' else '%.2f' % abs(solang) properties.append(tmp_prop) else: solang_sign = '0' if (sa in [None, 0]) else ( '+' if sa > 0 else '-') properties.append(solang_sign) if hash_type == 'str': idf = hash_ecfp(ecfp=','.join( [str(p) for p in properties]), power=idf_power) elif hash_type == 'vec': idf = hash(tuple(properties)) else: print('Wrong hash type!') return idf_dict idf_dict[(aid, 0)] = idf return idf_dict
def mol_to_dgl(mol): """Featurizes an rdkit mol object to a DGL Graph, with node and edge features Parameters ---------- mol : rdkit mol Returns ------- dgl.graph """ g = dgl.DGLGraph() g.add_nodes(mol.GetNumAtoms()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # Atom features atom_features = [] pd = GetPeriodicTable() # ComputeGasteigerCharges(mol) for atom in mol.GetAtoms(): atom_feat = [] atom_type = [0] * len(ATOM_TYPES) atom_type[ATOM_TYPES.index(atom.GetSymbol())] = 1 chiral = [0] * len(CHIRALITY) chiral[CHIRALITY.index(atom.GetChiralTag())] = 1 ex_valence = atom.GetExplicitValence() charge = atom.GetFormalCharge() hybrid = [0] * len(HYBRIDIZATION) hybrid[HYBRIDIZATION.index(atom.GetHybridization())] = 1 degree = atom.GetDegree() valence = atom.GetImplicitValence() aromatic = int(atom.GetIsAromatic()) ex_hs = atom.GetNumExplicitHs() im_hs = atom.GetNumImplicitHs() rad = atom.GetNumRadicalElectrons() ring = int(atom.IsInRing()) mass = pd.GetAtomicWeight(atom.GetSymbol()) vdw = pd.GetRvdw(atom.GetSymbol()) # pcharge = float(atom.GetProp("_GasteigerCharge")) atom_feat.extend(atom_type) atom_feat.extend(chiral) atom_feat.append(ex_valence) atom_feat.append(charge) atom_feat.extend(hybrid) atom_feat.append(degree) atom_feat.append(valence) atom_feat.append(aromatic) atom_feat.append(ex_hs) atom_feat.append(im_hs) atom_feat.append(rad) atom_feat.append(ring) atom_feat.append(mass) atom_feat.append(vdw) # atom_feat.append(pcharge) atom_features.append(atom_feat) for bond in mol.GetBonds(): g.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) g.ndata["feat"] = torch.FloatTensor(atom_features) # Bond features bond_features = [] for bond in mol.GetBonds(): bond_feat = [] bond_type = [0] * len(BOND_TYPES) bond_type[BOND_TYPES.index(bond.GetBondType())] = 1 bond_stereo = [0] * len(BOND_STEREO) bond_stereo[BOND_STEREO.index(bond.GetStereo())] = 1 bond_feat.extend(bond_type) bond_feat.extend(bond_stereo) bond_feat.append(float(bond.GetIsConjugated())) bond_feat.append(float(bond.IsInRing())) bond_features.append(bond_feat) g.edata["feat"] = torch.FloatTensor(bond_features) return g