def calcDatasetVoxel(self, protPath, ligPath, number, altProtPath, altLigPath): dataset = list() print(ligPath) try: sm = SmallMol(ligPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) fs, cs, ns = voxeldescriptors.getVoxelDescriptors( sm, center=[x, y, z], boxsize=self.boxsize) except: # if the normal file is broke, you can use an alternative format sm = SmallMol(altLigPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) fs, cs, ns = voxeldescriptors.getVoxelDescriptors( sm, center=[x, y, z], boxsize=self.boxsize) f, c, n = self.calcProtVoxel(x, y, z, protPath, number, altProtPath) feature_protein = f feature_protein_shaped = f.reshape(n[0], n[1], n[2], f.shape[1]) feature_ligand = fs feature_ligand_shaped = fs.reshape(ns[0], ns[1], ns[2], fs.shape[1]) datapoint = np.concatenate( (feature_protein_shaped, feature_ligand_shaped), axis=3).transpose([3, 0, 1, 2]) dataset.append(datapoint) return np.array(dataset), np.array(c), np.array( feature_protein), np.array(feature_ligand), np.array( feature_protein_shaped), np.array(feature_ligand_shaped)
def test_getAtoms(self): smi = SMILE_SMI sm = SmallMol(smi) element_idx_1 = sm.get("element", "idx 1")[0] neighbors_element_O = sm.get("neighbors", "element O")[0] btypes_element_O = sm.get("bondtype", "element O", convertType=False)[0] self.assertEqual( element_idx_1, PHENOL_ELEMENT_IDX_1, "Element of the first atom does not correspond" "Expect: {}; Now: {}".format(element_idx_1, PHENOL_ELEMENT_IDX_1), ) self.assertListEqual( neighbors_element_O, PHENOL_ELEMENT_NEIGHBORS_OX, "Neighbors atoms of the oxygen atom do not correspond" "Expected: {}; Now: {}".format(PHENOL_ELEMENT_NEIGHBORS_OX, neighbors_element_O), ) self.assertListEqual( btypes_element_O, PHENOL_BTYPES_OX, "Bondtypes of the oxygen atom do not correspond:" "Expeected: {}; Now: {}".format(btypes_element_O, PHENOL_BTYPES_OX), )
def test_copy(self): sm = SmallMol(self.benzamidine_mol2) sm_copy = sm.copy() coords = sm.get("coords") coords_copy = sm_copy.get("coords") assert np.array_equal(coords, coords_copy) # Ensure no hydrogens are added in the copy method sm = SmallMol(self.benzamidine_mol2, removeHs=True, fixHs=False) sm_copy = sm.copy() coords = sm.get("coords") coords_copy = sm_copy.get("coords") assert np.array_equal(coords, coords_copy)
def test_getAtoms(self): smi = SMILE_SMI sm = SmallMol(smi) element_idx_1 = sm.get('element', 'idx 1')[0] neighbors_element_O = sm.get('neighbors', 'element O')[0] btypes_element_O = sm.get('bondtype', 'element O', convertType=False)[0] self.assertEqual(element_idx_1, PHENOL_ELEMENT_IDX_1, 'Element of the first atom does not correspond' 'Expect: {}; Now: {}'.format(element_idx_1, PHENOL_ELEMENT_IDX_1)) self.assertListEqual(neighbors_element_O, PHENOL_ELEMENT_NEIGHBORS_OX, 'Neighbors atoms of the oxygen atom do not correspond' 'Expected: {}; Now: {}'.format(PHENOL_ELEMENT_NEIGHBORS_OX, neighbors_element_O)) self.assertListEqual(btypes_element_O, PHENOL_BTYPES_OX, 'Bondtypes of the oxygen atom do not correspond:' 'Expeected: {}; Now: {}'.format(btypes_element_O, PHENOL_BTYPES_OX))
def calcFeatures(number, ligPath, altLigPath, protPath, altProtPath, boxsize, targetpath): features = {} try: sm = SmallMol(ligPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) smallChannels, sm = voxeldescriptors.getChannels(sm) except: sm = SmallMol(altLigPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) smallChannels, sm = voxeldescriptors.getChannels(sm) features['smallChannels'] = smallChannels features['sm'] = sm try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') protChannels, prot = voxeldescriptors.getChannels(prot) except: try: prot = Molecule(altProtPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') protChannels, prot = voxeldescriptors.getChannels(prot) except: try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.filter('not resname 3EB') prot = proteinPrepare(prot) prot = autoSegment(prot) # Residues are not supported try: prot.mutateResidue('resname TPO', 'THR') except: pass try: prot.mutateResidue('resname MSE', 'MET') except: pass try: prot.mutateResidue('resname SEP', 'SER') except: pass prot = charmm.build(prot, ionize=False) protChannels, prot = voxeldescriptors.getChannels(prot) except: f = open("../../Data/prep_log.txt", "a") f.writelines('Protein ' + protPath + ' leads to errors! Proteinnumber: ' + str(number) + '\n') f.close() protChannels = None features['protChannels'] = protChannels features['prot'] = prot return features