def test_combined(self): ecfp_power = 5 splif_power = 5 # test voxel features featurizer = rgf.RdkitGridFeaturizer( voxel_width=1.0, box_width=20.0, feature_types=['voxel_combined'], ecfp_power=ecfp_power, splif_power=splif_power, flatten=False, sanitize=True) feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray) voxel_total_len = ( 2**ecfp_power + len(featurizer.cutoffs['splif_contact_bins']) * 2**splif_power + len( featurizer.cutoffs['hbond_dist_bins']) + 5) self.assertEqual(feature_tensor.shape, (1, 20, 20, 20, voxel_total_len)) # test flat features featurizer = rgf.RdkitGridFeaturizer( voxel_width=1.0, feature_types=['flat_combined'], ecfp_power=ecfp_power, splif_power=splif_power, sanitize=True) feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray) flat_total_len = ( 3 * 2**ecfp_power + len(featurizer.cutoffs['splif_contact_bins']) * 2**splif_power + len( featurizer.cutoffs['hbond_dist_bins'])) self.assertEqual(feature_tensor.shape, (1, flat_total_len)) # check if aromatic features are ignores if sanitize=False featurizer = rgf.RdkitGridFeaturizer( voxel_width=16.0, feature_types=['all_combined'], ecfp_power=ecfp_power, splif_power=splif_power, flatten=True, sanitize=False) self.assertTrue('pi_stack' not in featurizer.feature_types) self.assertTrue('cation_pi' not in featurizer.feature_types) feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray) total_len = voxel_total_len + flat_total_len - 3 - 2**ecfp_power self.assertEqual(feature_tensor.shape, (1, total_len))
def test_default_featurizer(self): # test if default parameters work featurizer = rgf.RdkitGridFeaturizer() self.assertIsInstance(featurizer, rgf.RdkitGridFeaturizer) feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray)
def test_rotations(self): featurizer = rgf.RdkitGridFeaturizer( nb_rotations=3, feature_types=['voxel_combined'], flatten=False, sanitize=True) feature_tensors = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertEqual(feature_tensors.shape, (1, 4, 16, 16, 16, 40))
def test_force_flatten(self): # test if input is flattened when flat features are used featurizer = rgf.RdkitGridFeaturizer( feature_types=['ecfp_hashed'], flatten=False) featurizer.flatten = True # False should be ignored with ecfp_hashed feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray) self.assertEqual(feature_tensor.shape, (1, 2 * 2**featurizer.ecfp_power))
def test_example_featurizer(self): # check if use-case from examples works featurizer = rgf.RdkitGridFeaturizer( voxel_width=16.0, feature_types=['ecfp', 'splif', 'hbond', 'salt_bridge'], ecfp_power=9, splif_power=9, flatten=True) feature_tensor = featurizer.featurize([(self.ligand_file, self.protein_file)]) self.assertIsInstance(feature_tensor, np.ndarray)
def test_custom_cutoffs(self): custom_cutoffs = { 'hbond_dist_bins': [(2., 3.), (3., 3.5)], 'hbond_angle_cutoffs': [5, 90], 'splif_contact_bins': [(0, 3.5), (3.5, 6.0)], 'ecfp_cutoff': 5.0, 'sybyl_cutoff': 3.0, 'salt_bridges_cutoff': 4.0, 'pi_stack_dist_cutoff': 5.0, 'pi_stack_angle_cutoff': 15.0, 'cation_pi_dist_cutoff': 5.5, 'cation_pi_angle_cutoff': 20.0, } rgf_featurizer = rgf.RdkitGridFeaturizer(**custom_cutoffs) self.assertEqual(rgf_featurizer.cutoffs, custom_cutoffs)
def test_voxelize(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) centroid = rgf.compute_centroid(lig_xyz) prot_xyz = rgf.subtract_centroid(prot_xyz, centroid) lig_xyz = rgf.subtract_centroid(lig_xyz, centroid) prot_ecfp_dict, lig_ecfp_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk) box_w = 20 f_power = 5 rgf_featurizer = rgf.RdkitGridFeaturizer( box_width=box_w, ecfp_power=f_power, feature_types=['all_combined'], flatten=True, sanitize=True) prot_tensor = rgf_featurizer._voxelize( rgf.convert_atom_to_voxel, rgf.hash_ecfp, prot_xyz, feature_dict=prot_ecfp_dict, channel_power=f_power) self.assertEqual(prot_tensor.shape, tuple([box_w] * 3 + [2**f_power])) all_features = prot_tensor.sum() # protein is too big for the box, some features should be missing self.assertGreater(all_features, 0) self.assertLess(all_features, prot_rdk.GetNumAtoms()) lig_tensor = rgf_featurizer._voxelize( rgf.convert_atom_to_voxel, rgf.hash_ecfp, lig_xyz, feature_dict=lig_ecfp_dict, channel_power=f_power) self.assertEqual(lig_tensor.shape, tuple([box_w] * 3 + [2**f_power])) all_features = lig_tensor.sum() # whole ligand should fit in the box self.assertEqual(all_features, lig_rdk.GetNumAtoms())