Exemplo n.º 1
0
  def test_combined(self):
    ecfp_power = 5
    splif_power = 5
    # test voxel features
    featurizer = rgf.RdkitGridFeaturizer(
        voxel_width=1.0,
        box_width=20.0,
        feature_types=['voxel_combined'],
        ecfp_power=ecfp_power,
        splif_power=splif_power,
        flatten=False,
        sanitize=True)
    feature_tensor = featurizer.featurize([(self.ligand_file,
                                            self.protein_file)])
    self.assertIsInstance(feature_tensor, np.ndarray)
    voxel_total_len = (
        2**ecfp_power +
        len(featurizer.cutoffs['splif_contact_bins']) * 2**splif_power + len(
            featurizer.cutoffs['hbond_dist_bins']) + 5)
    self.assertEqual(feature_tensor.shape, (1, 20, 20, 20, voxel_total_len))

    # test flat features
    featurizer = rgf.RdkitGridFeaturizer(
        voxel_width=1.0,
        feature_types=['flat_combined'],
        ecfp_power=ecfp_power,
        splif_power=splif_power,
        sanitize=True)
    feature_tensor = featurizer.featurize([(self.ligand_file,
                                            self.protein_file)])
    self.assertIsInstance(feature_tensor, np.ndarray)
    flat_total_len = (
        3 * 2**ecfp_power +
        len(featurizer.cutoffs['splif_contact_bins']) * 2**splif_power + len(
            featurizer.cutoffs['hbond_dist_bins']))
    self.assertEqual(feature_tensor.shape, (1, flat_total_len))

    # check if aromatic features are ignores if sanitize=False
    featurizer = rgf.RdkitGridFeaturizer(
        voxel_width=16.0,
        feature_types=['all_combined'],
        ecfp_power=ecfp_power,
        splif_power=splif_power,
        flatten=True,
        sanitize=False)

    self.assertTrue('pi_stack' not in featurizer.feature_types)
    self.assertTrue('cation_pi' not in featurizer.feature_types)
    feature_tensor = featurizer.featurize([(self.ligand_file,
                                            self.protein_file)])
    self.assertIsInstance(feature_tensor, np.ndarray)
    total_len = voxel_total_len + flat_total_len - 3 - 2**ecfp_power
    self.assertEqual(feature_tensor.shape, (1, total_len))
Exemplo n.º 2
0
 def test_default_featurizer(self):
   # test if default parameters work
   featurizer = rgf.RdkitGridFeaturizer()
   self.assertIsInstance(featurizer, rgf.RdkitGridFeaturizer)
   feature_tensor = featurizer.featurize([(self.ligand_file,
                                           self.protein_file)])
   self.assertIsInstance(feature_tensor, np.ndarray)
Exemplo n.º 3
0
 def test_rotations(self):
   featurizer = rgf.RdkitGridFeaturizer(
       nb_rotations=3,
       feature_types=['voxel_combined'],
       flatten=False,
       sanitize=True)
   feature_tensors = featurizer.featurize([(self.ligand_file,
                                            self.protein_file)])
   self.assertEqual(feature_tensors.shape, (1, 4, 16, 16, 16, 40))
Exemplo n.º 4
0
 def test_force_flatten(self):
   # test if input is flattened when flat features are used
   featurizer = rgf.RdkitGridFeaturizer(
       feature_types=['ecfp_hashed'], flatten=False)
   featurizer.flatten = True  # False should be ignored with ecfp_hashed
   feature_tensor = featurizer.featurize([(self.ligand_file,
                                           self.protein_file)])
   self.assertIsInstance(feature_tensor, np.ndarray)
   self.assertEqual(feature_tensor.shape, (1, 2 * 2**featurizer.ecfp_power))
Exemplo n.º 5
0
 def test_example_featurizer(self):
   # check if use-case from examples works
   featurizer = rgf.RdkitGridFeaturizer(
       voxel_width=16.0,
       feature_types=['ecfp', 'splif', 'hbond', 'salt_bridge'],
       ecfp_power=9,
       splif_power=9,
       flatten=True)
   feature_tensor = featurizer.featurize([(self.ligand_file,
                                           self.protein_file)])
   self.assertIsInstance(feature_tensor, np.ndarray)
Exemplo n.º 6
0
 def test_custom_cutoffs(self):
   custom_cutoffs = {
       'hbond_dist_bins': [(2., 3.), (3., 3.5)],
       'hbond_angle_cutoffs': [5, 90],
       'splif_contact_bins': [(0, 3.5), (3.5, 6.0)],
       'ecfp_cutoff': 5.0,
       'sybyl_cutoff': 3.0,
       'salt_bridges_cutoff': 4.0,
       'pi_stack_dist_cutoff': 5.0,
       'pi_stack_angle_cutoff': 15.0,
       'cation_pi_dist_cutoff': 5.5,
       'cation_pi_angle_cutoff': 20.0,
   }
   rgf_featurizer = rgf.RdkitGridFeaturizer(**custom_cutoffs)
   self.assertEqual(rgf_featurizer.cutoffs, custom_cutoffs)
Exemplo n.º 7
0
  def test_voxelize(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)

    centroid = rgf.compute_centroid(lig_xyz)
    prot_xyz = rgf.subtract_centroid(prot_xyz, centroid)
    lig_xyz = rgf.subtract_centroid(lig_xyz, centroid)

    prot_ecfp_dict, lig_ecfp_dict = rgf.featurize_binding_pocket_ecfp(
        prot_xyz, prot_rdk, lig_xyz, lig_rdk)

    box_w = 20
    f_power = 5

    rgf_featurizer = rgf.RdkitGridFeaturizer(
        box_width=box_w,
        ecfp_power=f_power,
        feature_types=['all_combined'],
        flatten=True,
        sanitize=True)

    prot_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        prot_xyz,
        feature_dict=prot_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(prot_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = prot_tensor.sum()
    # protein is too big for the box, some features should be missing
    self.assertGreater(all_features, 0)
    self.assertLess(all_features, prot_rdk.GetNumAtoms())

    lig_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        lig_xyz,
        feature_dict=lig_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(lig_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = lig_tensor.sum()
    # whole ligand should fit in the box
    self.assertEqual(all_features, lig_rdk.GetNumAtoms())