def atom_level_descriptors(mol, include=['functional'], asOneHot=False, ORIGINAL_VERSION=False): ''' Given an RDKit mol, returns an N_atom-long list of lists, each of which contains atom-level descriptors and their names returns: (label, attributes) ''' attributes = [[] for i in mol.GetAtoms()] labels = [] if 'functional' in include: [attributes[i].append(x[0]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))] labels.append('Crippen contribution to logp') [attributes[i].append(x[1]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))] labels.append('Crippen contribution to mr') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))] labels.append('TPSA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])] labels.append('Labute ASA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(EState.EStateIndices(mol))] labels.append('EState Index') rdPartialCharges.ComputeGasteigerCharges(mol) [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \ for (i, a) in enumerate(mol.GetAtoms())] labels.append('Gasteiger partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]): attributes[i][-1] = 0.0 [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \ for (i, a) in enumerate(mol.GetAtoms())] labels.append('Gasteiger hydrogen partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]): attributes[i][-1] = 0.0 if 'structural' in include: [attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \ for i in range(len(attributes))] labels.append('--many structural--') return (labels, attributes)
def testCrippen(self): mol = Chem.MolFromSmiles("n1ccccc1CO"); contribs = rdMD._CalcCrippenContribs(mol) self.failUnlessEqual(len(contribs),mol.GetNumAtoms()); ts = [0]*mol.GetNumAtoms() contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypes=ts) self.failUnlessEqual(ts,[59, 25, 25, 25, 25, 28, 17, 69]) ls = ['']*mol.GetNumAtoms() contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypeLabels=ls) self.failUnlessEqual(ls,['N11', 'C18', 'C18', 'C18', 'C18', 'C21', 'C10', 'O2'])
def testCrippen(self): mol = Chem.MolFromSmiles("n1ccccc1CO"); contribs = rdMD._CalcCrippenContribs(mol) self.assertEqual(len(contribs),mol.GetNumAtoms()); ts = [0]*mol.GetNumAtoms() contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypes=ts) self.assertEqual(ts,[59, 25, 25, 25, 25, 28, 17, 69]) ls = ['']*mol.GetNumAtoms() contribs = rdMD._CalcCrippenContribs(mol,force=True,atomTypeLabels=ls) self.assertEqual(ls,['N11', 'C18', 'C18', 'C18', 'C18', 'C21', 'C10', 'O2'])
def mol_to_nx(mol) -> nx.Graph: G = nx.Graph() conf = mol.GetConformer() SanitizeMol(mol, SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_PROPERTIES) ComputeGasteigerCharges(mol) ring_info = mol.GetRingInfo() crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol) tpsa_contribs = rdMolDescriptors._CalcTPSAContribs(mol) for atom in mol.GetAtoms(): idx = atom.GetIdx() # if atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2: # formal_charge = -1 # elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 4: # formal_charge = 1 # elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1: # formal_charge = -1 # else: # formal_charge = atom.GetFormalCharge() formal_charge = atom.GetFormalCharge() G.add_node( idx, pos=conf.GetAtomPosition(idx), formal_charge=formal_charge, chiral_tag=atom.GetChiralTag(), hybridization=atom.GetHybridization(), # num_explicit_hs=atom.GetNumExplicitHs(), # All same is_aromatic=atom.GetIsAromatic(), num_atom_rings=ring_info.NumAtomRings(idx), is_in_ring_size3=atom.IsInRingSize(3), is_in_ring_size4=atom.IsInRingSize(4), is_in_ring_size5=atom.IsInRingSize(5), is_in_ring_size6=atom.IsInRingSize(6), symbol=atom.GetSymbol(), total_valence=atom.GetTotalValence(), gasteiger_charge=atom.GetProp('_GasteigerCharge'), num_implicit_hs=atom.GetNumImplicitHs(), total_degree=atom.GetTotalDegree(), crippen_logp=crippen_contribs[idx][0], crippen_mr=crippen_contribs[idx][1], tpsa=tpsa_contribs[idx], ) for bond in mol.GetBonds(): G.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType(), is_conjugated=bond.GetIsConjugated(), ) return G
def calculate_logP(self, mol): """Calculates Crippen contributions, i.e. logP of ligand molecule. Takes: * mol * - mol file in rdkit environment Returns: * contribs * - tuple of Wildman-Crippen logP, MR (molar refractivity - measure of the volume occupied by a molecule of the substance) values """ contribs = rdMolDescriptors._CalcCrippenContribs(mol) return contribs
def calculate_logP(self,mol): """Calculates Crippen contributions, i.e. logP of ligand molecule. Takes: * mol * - mol file in rdkit environment Returns: * contribs * - tuple of Wildman-Crippen logP, MR (molar refractivity - measure of the volume occupied by a molecule of the substance) values """ contribs = rdMolDescriptors._CalcCrippenContribs(mol) return contribs
def test8CrippenO3A(self): sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', # 'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf') molS = Chem.SDMolSupplier(sdf, True, False) # molW = Chem.SDWriter(alignedSdf) refNum = 48 refMol = molS[refNum] cumScore = 0.0 cumMsd = 0.0 refList = rdMolDescriptors._CalcCrippenContribs(refMol, True) for prbMol in molS: prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True) pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList) cumScore += pyO3A.Score() rmsd = pyO3A.Align() cumMsd += rmsd * rmsd # molW.write(prbMol) cumMsd /= len(molS) self.failUnlessAlmostEqual(cumScore,4918,0) self.failUnlessAlmostEqual(math.sqrt(cumMsd),.304,3)
def test10CrippenO3A(self): sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf') molS = Chem.SDMolSupplier(sdf, True, False) molW = Chem.SDWriter(alignedSdf) refNum = 48 refMol = molS[refNum] cumScore = 0.0 cumMsd = 0.0 refList = rdMolDescriptors._CalcCrippenContribs(refMol, True) for prbMol in molS: prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True) pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList) cumScore += pyO3A.Score() rmsd = pyO3A.Align() cumMsd += rmsd * rmsd molW.write(prbMol) cumMsd /= len(molS) self.assertAlmostEqual(cumScore,4918,0) self.assertAlmostEqual(math.sqrt(cumMsd),.304,3)
def align_set_of_ligands(ligands: Sequence) -> Tuple[List[Chem.Mol], List[float]]: """ Align a set of ligands to each other Parameters ---------- ligands : list of rdkit.Chem.rdchem.Mol or rdkit.Chem.SmilesMolSupplier or rdkit.Chem.SDMolSupplier List of ligands. Returns ---------- aligned_molecules : list of rdkit.Chem.rdchem.Mol List of aligned ligands. crippen_score : list of float List with crippen scores calculated during the alignment. """ if not isinstance(ligands, list): ligands = list(ligands) molecules = copy.deepcopy(ligands) molecules = [generate_conformers(mol, 100) for mol in molecules] crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in molecules] crippen_ref_contrib = crippen_contribs[0] crippen_prob_contribs = crippen_contribs ref_mol = molecules[0] probe_mols = molecules crippen_score = [] aligned_molecules = [] for idx, mol in enumerate(probe_mols): tempscore = [] for cid in range(100): crippenO3A = rdMolAlign.GetCrippenO3A(mol, ref_mol, crippen_prob_contribs[idx], crippen_ref_contrib, cid, 0) crippenO3A.Align() tempscore.append(crippenO3A.Score()) best = np.argmax(tempscore) mol_string = Chem.MolToMolBlock(mol, confId=int(best)) temp_mol = Chem.MolFromMolBlock(mol_string, removeHs=False) crippen_score.append(tempscore[best]) aligned_molecules.append(temp_mol) return aligned_molecules, crippen_score
def assignProperties(mol): ''' Calculate atom-level descriptors that can be used in featurization ''' for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol)): mol.GetAtomWithIdx(i).SetDoubleProp('crippen_logp',x[0]) mol.GetAtomWithIdx(i).SetDoubleProp('crippen_mr', x[1]) for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol)): mol.GetAtomWithIdx(i).SetDoubleProp('tpsa', x) for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0]): mol.GetAtomWithIdx(i).SetDoubleProp('asa', x) for (i, x) in enumerate(EState.EStateIndices(mol)): mol.GetAtomWithIdx(i).SetDoubleProp('estate', x) rdPartialCharges.ComputeGasteigerCharges(mol) # '_GasteigerCharge', '_GasteigerHCharge'
def get_molecular_attributes(rdmol): """ Molecular attributes calculated as: [Crippen contribution to logp, Crippen contribution to mr, TPSA contribution, Labute ASA contribution, EState Index, Gasteiger partial charge, Gasteiger hydrogen partial charge] Parameters ---------- rdmol : rdkit.Chem.rdchem.Mol rdkit molecule class Returns ------- attributes : list feature vector """ attributes = [[] for _ in rdmol.GetAtoms()] for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol)): attributes[i].append(x[0]) attributes[i].append(x[1]) for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol)): attributes[i].append(x) for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0]): attributes[i].append(x) for (i, x) in enumerate(EState.EStateIndices(rdmol)): attributes[i].append(x) rdPartialCharges.ComputeGasteigerCharges(rdmol) for (i, a) in enumerate(rdmol.GetAtoms()): val = float(a.GetProp('_GasteigerCharge')) if val == val and val < np.inf: attributes[i].append(val) else: attributes[i].append(0.0) for (i, a) in enumerate(rdmol.GetAtoms()): val = float(a.GetProp('_GasteigerHCharge')) if val == val and val < np.inf: attributes[i].append(val) else: attributes[i].append(0.0) return attributes
def molToGraph(rdmol): ''' Converts an RDKit molecule to an attributed undirected graph @param rdmol: RDKit molecule @return: Graph ''' graph = Graph() # Calculate atom-level molecule descriptors nodesFeatures = [[] for i in rdmol.GetAtoms()] #6 (25) Crippen contribution to logp [nodesFeatures[i].append(x[0]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] #7 (26) Crippen contribution to mr [nodesFeatures[i].append(x[1]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] #8 (27) TPSA contribution [nodesFeatures[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))] #9 (28) Labute ASA contribution [nodesFeatures[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])] #10 (29) EState Index [nodesFeatures[i].append(x) \ for (i, x) in enumerate(EState.EStateIndices(rdmol))] # Calculate Gasteiger charges for features 30 and 31 rdPartialCharges.ComputeGasteigerCharges(rdmol) # The computed charges are stored on each atom with computed property # under the name _GasteigerCharge and _GasteigerHCharge. # Values could be NaN. #11 (30) for (i, a) in enumerate(rdmol.GetAtoms()): if np.isnan(float(a.GetProp('_GasteigerCharge'))) or np.isinf( float(a.GetProp('_GasteigerCharge'))): nodesFeatures[i].append(0.0) else: nodesFeatures[i].append(float(a.GetProp('_GasteigerCharge'))) #12 (31) for (i, a) in enumerate(rdmol.GetAtoms()): if np.isnan(float(a.GetProp('_GasteigerHCharge'))) or np.isinf( float(a.GetProp('_GasteigerHCharge'))): nodesFeatures[i].append(0.0) else: nodesFeatures[i].append(float(a.GetProp('_GasteigerHCharge'))) # Add edges to graph for bond in rdmol.GetBonds(): edge = Edge() edge.id = bond.GetIdx() edge.features = getBondFeatures(bond).astype('float32') edge.ends = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) graph.edges.append(edge) # Add nodes to graph for i, atom in enumerate(rdmol.GetAtoms()): node = Node() node.id = atom.GetIdx() node.features = getAtomFeatures(atom, nodesFeatures[i]) for neighbor in atom.GetNeighbors(): node.neighbors.append( (neighbor.GetIdx(), rdmol.GetBondBetweenAtoms(atom.GetIdx(), neighbor.GetIdx()).GetIdx())) graph.nodes.append(node) graph.nodeNum = len(graph.nodes) graph.nodeFeatureDim = len(graph.nodes[0].features) if (len(graph.edges) > 0): graph.edgeFeatureDim = len(graph.edges[0].features) return graph
def molToGraph(rdmol, molecular_attributes=False): '''Converts an RDKit molecule to an attributed undirected graph''' # Initialize graph = Graph() graph.molecular_attributes = molecular_attributes # Calculate atom-level molecule descriptors attributes = [[] for i in rdmol.GetAtoms()] if molecular_attributes: labels = [] [attributes[i].append(x[0]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] labels.append('Crippen contribution to logp') [attributes[i].append(x[1]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] labels.append('Crippen contribution to mr') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))] labels.append('TPSA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])] labels.append('Labute ASA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(EState.EStateIndices(rdmol))] labels.append('EState Index') rdPartialCharges.ComputeGasteigerCharges(rdmol) [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \ for (i, a) in enumerate(rdmol.GetAtoms())] labels.append('Gasteiger partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]): attributes[i][-1] = 0.0 [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \ for (i, a) in enumerate(rdmol.GetAtoms())] labels.append('Gasteiger hydrogen partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]): attributes[i][-1] = 0.0 # Add bonds for bond in rdmol.GetBonds(): edge = Edge() edge.i = bond.GetIdx() edge.attributes = bondAttributes(bond) edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) graph.edges.append(edge) # Add atoms for k, atom in enumerate(rdmol.GetAtoms()): node = Node() node.i = atom.GetIdx() node.attributes = atomAttributes(atom, extra_attributes=attributes[k]) for neighbor in atom.GetNeighbors(): node.neighbors.append( (neighbor.GetIdx(), rdmol.GetBondBetweenAtoms(atom.GetIdx(), neighbor.GetIdx()).GetIdx())) graph.nodes.append(node) # Add counts, for convenience graph.num_edges = len(graph.edges) graph.num_nodes = len(graph.nodes) return graph
def make_graph(name, gb_structure, gb_scalar_coupling): # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type','scalar_coupling_constant'] coupling_df = gb_scalar_coupling.get_group(name) # [molecule_name,atom_index,atom,x,y,z] df = gb_structure.get_group(name) df = df.sort_values(['atom_index'], ascending=True) a = df.atom.values.tolist() xyz = df[['x', 'y', 'z']].values mol = mol_from_axyz(a, xyz) mol_op = openbabel.OBMol() obConversion.ReadFile(mol_op, f'../input/champs-scalar-coupling/structures/{name}.xyz') factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) num_atom = mol.GetNumAtoms() symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8) # category acceptor = np.zeros((num_atom, 1), np.uint8) donor = np.zeros((num_atom, 1), np.uint8) aromatic = np.zeros((num_atom, 1), np.uint8) hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8) num_h = np.zeros((num_atom, 1), np.float32) # real atomic = np.zeros((num_atom, 1), np.float32) # new features degree = np.zeros((num_atom, 1), np.uint8) formalCharge = np.zeros((num_atom, 1), np.float32) chiral_tag = np.zeros((num_atom, 1), np.uint8) crippen_contribs = np.zeros((num_atom, 2), np.float32) tpsa = np.zeros((num_atom, 1), np.float32) labute_asac = np.zeros((num_atom, 1), np.float32) gasteiger_charges = np.zeros((num_atom, 1), np.float32) esataindices = np.zeros((num_atom, 1), np.float32) atomic_radiuss = np.zeros((num_atom, 1), np.float32) electronegate = np.zeros((num_atom, 1), np.float32) electronegate_sqre = np.zeros((num_atom, 1), np.float32) mass = np.zeros((num_atom, 1), np.float32) van = np.zeros((num_atom, 1), np.float32) cov = np.zeros((num_atom, 1), np.float32) ion = np.zeros((num_atom, 1), np.float32) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) atom_op = mol_op.GetAtomById(i) symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL) aromatic[i] = atom.GetIsAromatic() hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION) num_h[i] = atom.GetTotalNumHs(includeNeighbors=True) atomic[i] = atom.GetAtomicNum() degree[i] = atom.GetTotalDegree() formalCharge[i] = atom.GetFormalCharge() chiral_tag[i] = int(atom.GetChiralTag()) crippen_contribs[i] = rdMolDescriptors._CalcCrippenContribs(mol)[i] tpsa[i] = rdMolDescriptors._CalcTPSAContribs(mol)[i] labute_asac[i] = rdMolDescriptors._CalcLabuteASAContribs(mol)[0][i] gasteiger_charges[i] = atom_op.GetPartialCharge() esataindices[i] = EState.EStateIndices(mol)[i] atomic_radiuss[i] = atomic_radius[atom.GetSymbol()] electronegate[i] = electronegativity[atom.GetSymbol()] electronegate_sqre[i] = electronegativity_square[atom.GetSymbol()] mass[i] = atomic_mass[atom.GetSymbol()] van[i] = vanderwaalsradius[atom.GetSymbol()] cov[i] = covalenzradius[atom.GetSymbol()] ion[i] = ionization_energy[atom.GetSymbol()] for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 num_edge = num_atom * num_atom - num_atom edge_index = np.zeros((num_edge, 2), np.uint32) bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint32) distance = np.zeros((num_edge, 1), np.float32) angle = np.zeros((num_edge, 1), np.float32) norm_xyz = preprocessing.normalize(xyz, norm='l2') ij = 0 for i in range(num_atom): for j in range(num_atom): if i == j: continue edge_index[ij] = [i, j] bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE) distance[ij] = np.linalg.norm(xyz[i] - xyz[j]) angle[ij] = (norm_xyz[i] * norm_xyz[j]).sum() ij += 1 xyz = xyz * 1.889726133921252 atom = System(symbols=a, positions=xyz) acsf = ACSF_GENERATOR.create(atom) l = [] for item in coupling_df[['atom_index_0', 'atom_index_1']].values.tolist(): i = edge_index.tolist().index(item) l.append(i) l = np.array(l) coupling_edge_index = np.concatenate([coupling_df[['atom_index_0', 'atom_index_1']].values, l.reshape(len(l), 1)], axis=1) coupling = Coupling(coupling_df['id'].values, coupling_df[['fc', 'sd', 'pso', 'dso']].values, coupling_edge_index, np.array([COUPLING_TYPE.index(t) for t in coupling_df.type.values], np.int32), coupling_df['scalar_coupling_constant'].values, ) graph = Graph( name, Chem.MolToSmiles(mol), [a, xyz], [acsf, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic, degree, formalCharge, chiral_tag, crippen_contribs, tpsa, labute_asac, gasteiger_charges, esataindices, atomic_radiuss, electronegate, electronegate_sqre, mass, van, cov, ion], [bond_type, distance, angle, ], edge_index, coupling, ) return graph
def molToGraph(rdmol, bondtype_list_order, atomtype_list_order, molecular_attributes = False): '''Converts an RDKit molecule to an attributed undirected graph''' # Initialize graph = Graph() graph.molecular_attributes = molecular_attributes graph.bondtype_list_order = bondtype_list_order bond_list = bondtype_list_order graph.atomtype_list_order = atomtype_list_order # Calculate atom-level molecule descriptors attributes = [[] for i in rdmol.GetAtoms()] if molecular_attributes: labels = [] [attributes[i].append(x[0]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] labels.append('Crippen contribution to logp') [attributes[i].append(x[1]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))] labels.append('Crippen contribution to mr') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))] labels.append('TPSA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])] labels.append('Labute ASA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(EState.EStateIndices(rdmol))] labels.append('EState Index') rdPartialCharges.ComputeGasteigerCharges(rdmol) [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \ for (i, a) in enumerate(rdmol.GetAtoms())] labels.append('Gasteiger partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]): attributes[i][-1] = 0.0 [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \ for (i, a) in enumerate(rdmol.GetAtoms())] labels.append('Gasteiger hydrogen partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]): attributes[i][-1] = 0.0 # Add bonds for bond in rdmol.GetBonds(): edge = Edge() edge.i = bond.GetIdx() edge.attributes = bondAttributes(bond) edge.orderAtt = list(oneHotVector(bond.GetBondTypeAsDouble(), [1.0, 1.5, 2.0, 3.0])) edge.aromAtt = list(oneHotVector(bond.GetIsAromatic(), [1.0, 0.0])) edge.conjAtt = list(oneHotVector(bond.GetIsConjugated(), [1.0, 0.0])) edge.ringAtt = list(oneHotVector(bond.IsInRing(), [1.0, 0.0])) BeginAtom, EndAtom = bond.GetBeginAtom(), bond.GetEndAtom() begin_idx, end_idx = BeginAtom.GetAtomicNum(), EndAtom.GetAtomicNum() if begin_idx < end_idx: bond_type = str(begin_idx) + '_' + str(end_idx) else: bond_type= str(end_idx) + '_' + str(begin_idx) bond_attributes = [] bond_attributes = bond_attributes + list(oneHotVector(bond_type, bondtype_list_order)) edge.attributesAtt = np.array(bond_attributes, dtype=att_dtype) edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) graph.edges.append(edge) # Add atoms for k, atom in enumerate(rdmol.GetAtoms()): node = Node() node.i = atom.GetIdx() node.attributes = atomAttributes(atom, extra_attributes = attributes[k]) node_type = atom.GetAtomicNum() node_attributesAtt = [] node_attributesAtt = node_attributesAtt + list(oneHotVector(node_type, atomtype_list_order)) node.attributesAtt = np.array(node_attributesAtt, dtype=att_dtype) for neighbor in atom.GetNeighbors(): node.neighbors.append(( neighbor.GetIdx(), rdmol.GetBondBetweenAtoms( atom.GetIdx(), neighbor.GetIdx() ).GetIdx() )) graph.nodes.append(node) # Add counts, for convenience graph.num_edges = len(graph.edges) graph.num_nodes = len(graph.nodes) return graph
def atom_level_descriptors(mol, include = ['functional'], asOneHot = False, ORIGINAL_VERSION = False): """ Given an RDKit mol, returns an N_atom-long list of lists, each of which contains atom-level descriptors and their names Returns: (labels, attributes) """ attributes = [[] for i in mol.GetAtoms()] labels = [] if 'functional' in include: [attributes[i].append(x[0]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))] labels.append('Crippen contribution to logp') [attributes[i].append(x[1]) \ for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))] labels.append('Crippen contribution to mr') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))] labels.append('TPSA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])] labels.append('Labute ASA contribution') [attributes[i].append(x) \ for (i, x) in enumerate(EState.EStateIndices(mol))] labels.append('EState Index') rdPartialCharges.ComputeGasteigerCharges(mol) [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \ for (i, a) in enumerate(mol.GetAtoms())] labels.append('Gasteiger partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]): attributes[i][-1] = 0.0 [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \ for (i, a) in enumerate(mol.GetAtoms())] labels.append('Gasteiger hydrogen partial charge') # Gasteiger partial charges sometimes gives NaN for i in range(len(attributes)): if np.isnan(attributes[i][-1]): attributes[i][-1] = 0.0 if 'structural' in include: [attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \ for i in range(len(attributes))] labels.append('--many structural--') if 'dftb' in include: try: dftb_atom_atts = atom_dftb(mol) except ValueError as e:# often, an invalid element print(e) dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())] except KeyError as e: print(e) dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())] [attributes[i].extend(dftb_atom_atts[i]) for i in range(mol.GetNumAtoms())] labels.append('--many DFTB--') return (labels, attributes)
# lis2 = df['fps'] d2f = d2f.reset_index(drop=True) lis = d2f['mol_blocks'] d2f.head(100) mol_list = [] for m in lis: m1 = Chem.MolFromMolBlock(m, removeHs=False) mol_list.append(m1) cdk2mol = [m for m in mol_list] cdk2mol2 = copy.deepcopy(cdk2mol) crippen_contribs = [ rdMolDescriptors._CalcCrippenContribs(mol) for mol in cdk2mol2 ] ref = cdk2mol_reference ref_contrib = rdMolDescriptors._CalcCrippenContribs(ref) targets = cdk2mol2[0:] targets_contrib = crippen_contribs[0:] for i, target in enumerate(targets): crippenO3A = rdMolAlign.GetCrippenO3A(target, ref, targets_contrib[i], ref_contrib) crippenO3A.Align() v.DeleteAll() v.ShowMol(ref, name='ref', showOnly=False) for i in range(len(targets)): name = f'probe_{i}'