def getChannels(mol, aromaticNitrogen=False, version=2, validitychecks=True): from moleculekit.smallmol.smallmol import SmallMol from moleculekit.molecule import Molecule mol = mol.copy() if isinstance(mol, SmallMol): channels = _getPropertiesRDkit(mol) elif isinstance(mol, Molecule): if version == 1: channels = _getAtomtypePropertiesPDBQT(mol) elif version == 2: from moleculekit.tools.atomtyper import ( getFeatures, getPDBQTAtomTypesAndCharges, ) mol.atomtype, mol.charge = getPDBQTAtomTypesAndCharges( mol, aromaticNitrogen=aromaticNitrogen, validitychecks=validitychecks) channels = getFeatures(mol) if channels.dtype == bool: # Calculate for each channel the atom sigmas sigmas = _getChannelRadii(mol.get("element")) channels = sigmas[:, np.newaxis] * channels.astype(float) return channels, mol
def getChannels(mol, aromaticNitrogen=False, version=2, validitychecks=True, protein=True): #from moleculekit.smallmol.smallmol import SmallMol mol = mol.copy() if isinstance(mol, SmallMol): channels = _getPropertiesRDkit(mol) elif isinstance(mol, Molecule): if version == 1: channels = _getAtomtypePropertiesPDBQT(mol) elif version == 2: mol.atomtype, mol.charge = getPDBQTAtomTypesAndCharges( mol, aromaticNitrogen=aromaticNitrogen, validitychecks=validitychecks) residues = mol.resname # residue name for each atom amacid_features = np.stack( [amino_acid_feature(r) for r in residues], axis=0) elements = mol.element els = np.stack([get_atomic_feature(e) for e in elements], axis=0) from tempfile import NamedTemporaryFile tmpmol2 = NamedTemporaryFile(suffix='.pdb').name mol.write(tmpmol2) s_atoms = process_arpeggio(tmpmol2) os.remove(tmpmol2) channels = getFeatures(mol) # we take only channels = channels[:, features_to_keep] #assert len(channels) == len(s_atoms) # the order of s_atoms matches the order of channels arpeggio_features = [] for i, atom in enumerate(s_atoms): raw_features = sorted(tuple(s_atoms[i].atom_types)) feature_vector = np.zeros(len(arpeggio_atomtypes_unique), dtype=bool) for rf in raw_features: #print(rf) if rf in arpeggio_atomtypes_unique: feature_vector[arpeggio_atomtypes_unique[rf]] = 1 arpeggio_features.append(feature_vector) if feature_vector[-1] != 0: print(raw_features, feature_vector) arpeggio_features = np.stack(arpeggio_features, axis=0) #print('aaa', channels.shape, arpeggio_features.shape,amacid_features.shape,els.shape) protein_feature = np.ones( shape=(len(channels), 1), dtype=bool) # 1 for protein, 0 for ligand channels = np.concatenate([ channels, amacid_features, arpeggio_features, els, protein_feature ], axis=-1) #channels = np.concatenate([arpeggio_features,amacid_features,els], axis=-1 ) ''' # check that the order of atoms is the same coords = mol.get('coords') serials = mol.get('serial') for i, elem in enumerate(mol.get('element')): #print(elem, s_atoms[i].get_name()) print(sorted(tuple(s_atoms[i].atom_types))) try: assert np.allclose(coords[i],s_atoms[i].get_coord(), atol=1e-3) except: print(coords[i],s_atoms[i].get_coord()) assert serials[i] == s_atoms[i].get_serial_number() print('11111111111', len(channels), len(s_atoms)) ''' if channels.dtype == bool: # Calculate for each channel the atom sigmas sigmas = _getChannelRadii(mol.get('element')) channels = sigmas[:, np.newaxis] * channels.astype(float) #print(channels.shape,arpeggio_features.shape) #assert channels.shape[0] == arpeggio_features.shape[0] #channels = np.concatenate([channels, arpeggio_features], axis=-1 ) #print('bbb', channels.shape, (1000*np.mean(channels, axis=0)).astype(int)) #print('ccc', (1000*np.mean(channels[:,:12], axis=0)).astype(int)) return channels, mol