def test_compute_splif_features_in_range(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    prot_num_atoms = prot_rdk.GetNumAtoms()
    lig_num_atoms = lig_rdk.GetNumAtoms()
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    for bins in ((0, 2), (2, 3)):
      splif_dict = rgf.compute_splif_features_in_range(
          prot_rdk,
          lig_rdk,
          distance,
          bins,
      )

      self.assertIsInstance(splif_dict, dict)
      for (prot_idx, lig_idx), ecfp_pair in splif_dict.items():

        for idx in (prot_idx, lig_idx):
          self.assertIsInstance(idx, (int, np.int64))
        self.assertGreaterEqual(prot_idx, 0)
        self.assertLess(prot_idx, prot_num_atoms)
        self.assertGreaterEqual(lig_idx, 0)
        self.assertLess(lig_idx, lig_num_atoms)

        for ecfp in ecfp_pair:
          ecfp_idx, ecfp_frag = ecfp.split(',')
          ecfp_idx = int(ecfp_idx)
          self.assertGreaterEqual(ecfp_idx, 0)
Exemplo n.º 2
0
    def test_featurize_splif(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        bins = [(1, 2), (2, 3)]

        dicts = rgf.featurize_splif(prot_xyz,
                                    prot_rdk,
                                    lig_xyz,
                                    lig_rdk,
                                    contact_bins=bins,
                                    pairwise_distances=distance,
                                    ecfp_degree=2)
        expected_dicts = [
            rgf.compute_splif_features_in_range(prot_rdk,
                                                lig_rdk,
                                                distance,
                                                c_bin,
                                                ecfp_degree=2)
            for c_bin in bins
        ]
        self.assertIsInstance(dicts, list)
        self.assertEqual(dicts, expected_dicts)
Exemplo n.º 3
0
    def test_compute_splif_features_in_range(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        prot_num_atoms = prot_rdk.GetNumAtoms()
        lig_num_atoms = lig_rdk.GetNumAtoms()
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        for bins in ((0, 2), (2, 3)):
            splif_dict = rgf.compute_splif_features_in_range(
                prot_rdk,
                lig_rdk,
                distance,
                bins,
            )

            self.assertIsInstance(splif_dict, dict)
            for (prot_idx, lig_idx), ecfp_pair in splif_dict.items():

                for idx in (prot_idx, lig_idx):
                    self.assertIsInstance(idx, (int, np.int64))
                self.assertGreaterEqual(prot_idx, 0)
                self.assertLess(prot_idx, prot_num_atoms)
                self.assertGreaterEqual(lig_idx, 0)
                self.assertLess(lig_idx, lig_num_atoms)

                for ecfp in ecfp_pair:
                    ecfp_idx, ecfp_frag = ecfp.split(',')
                    ecfp_idx = int(ecfp_idx)
                    self.assertGreaterEqual(ecfp_idx, 0)
  def test_featurize_binding_pocket_ecfp(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    # check if results are the same if we provide precomputed distances
    prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
    )
    prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp(
        prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance)
    # ...but first check if we actually got two dicts
    self.assertIsInstance(prot_dict, dict)
    self.assertIsInstance(lig_dict, dict)

    self.assertEqual(prot_dict, prot_dict_dist)
    self.assertEqual(lig_dict, lig_dict_dist)

    # check if we get less features with smaller distance cutoff
    prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        cutoff=2.0,
    )
    prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        cutoff=6.0,
    )
    self.assertLess(len(prot_dict_d2), len(prot_dict))
    # ligands are typically small so all atoms might be present
    self.assertLessEqual(len(lig_dict_d2), len(lig_dict))
    self.assertGreater(len(prot_dict_d6), len(prot_dict))
    self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict))

    # check if using different ecfp_degree changes anything
    prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        ecfp_degree=3,
    )
    self.assertNotEqual(prot_dict_e3, prot_dict)
    self.assertNotEqual(lig_dict_e3, lig_dict)
Exemplo n.º 5
0
    def test_featurize_binding_pocket_ecfp(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        # check if results are the same if we provide precomputed distances
        prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
        )
        prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp(
            prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance)
        # ...but first check if we actually got two dicts
        self.assertIsInstance(prot_dict, dict)
        self.assertIsInstance(lig_dict, dict)

        self.assertEqual(prot_dict, prot_dict_dist)
        self.assertEqual(lig_dict, lig_dict_dist)

        # check if we get less features with smaller distance cutoff
        prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            cutoff=2.0,
        )
        prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            cutoff=6.0,
        )
        self.assertLess(len(prot_dict_d2), len(prot_dict))
        # ligands are typically small so all atoms might be present
        self.assertLessEqual(len(lig_dict_d2), len(lig_dict))
        self.assertGreater(len(prot_dict_d6), len(prot_dict))
        self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict))

        # check if using different ecfp_degree changes anything
        prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            ecfp_degree=3,
        )
        self.assertNotEqual(prot_dict_e3, prot_dict)
        self.assertNotEqual(lig_dict_e3, lig_dict)
Exemplo n.º 6
0
 def test_compute_charge_dictionary(self):
     for fname in (self.ligand_file, self.protein_file):
         _, mol = rgf.load_molecule(fname)
         ComputeGasteigerCharges(mol)
         charge_dict = rgf.compute_charge_dictionary(mol)
         self.assertEqual(len(charge_dict), mol.GetNumAtoms())
         for i in range(mol.GetNumAtoms()):
             self.assertIn(i, charge_dict)
             self.assertIsInstance(charge_dict[i], (float, int))
Exemplo n.º 7
0
 def test_compute_charge_dictionary(self):
   for fname in (self.ligand_file, self.protein_file):
     _, mol = rgf.load_molecule(fname)
     ComputeGasteigerCharges(mol)
     charge_dict = rgf.compute_charge_dictionary(mol)
     self.assertEqual(len(charge_dict), mol.GetNumAtoms())
     for i in range(mol.GetNumAtoms()):
       self.assertIn(i, charge_dict)
       self.assertIsInstance(charge_dict[i], (float, int))
  def test_voxelize(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)

    centroid = rgf.compute_centroid(lig_xyz)
    prot_xyz = rgf.subtract_centroid(prot_xyz, centroid)
    lig_xyz = rgf.subtract_centroid(lig_xyz, centroid)

    prot_ecfp_dict, lig_ecfp_dict = rgf.featurize_binding_pocket_ecfp(
        prot_xyz, prot_rdk, lig_xyz, lig_rdk)

    box_w = 20
    f_power = 5

    rgf_featurizer = rgf.RdkitGridFeaturizer(
        box_width=box_w,
        ecfp_power=f_power,
        feature_types=['all_combined'],
        flatten=True,
        sanitize=True)

    prot_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        prot_xyz,
        feature_dict=prot_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(prot_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = prot_tensor.sum()
    # protein is too big for the box, some features should be missing
    self.assertGreater(all_features, 0)
    self.assertLess(all_features, prot_rdk.GetNumAtoms())

    lig_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        lig_xyz,
        feature_dict=lig_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(lig_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = lig_tensor.sum()
    # whole ligand should fit in the box
    self.assertEqual(all_features, lig_rdk.GetNumAtoms())
Exemplo n.º 9
0
 def test_load_molecule(self):
   # adding hydrogens and charges is tested in dc.utils
   for add_hydrogens in (True, False):
     for calc_charges in (True, False):
       mol_xyz, mol_rdk = rgf.load_molecule(self.ligand_file, add_hydrogens,
                                            calc_charges)
       num_atoms = mol_rdk.GetNumAtoms()
       self.assertIsInstance(mol_xyz, np.ndarray)
       self.assertIsInstance(mol_rdk, Mol)
       self.assertEqual(mol_xyz.shape, (num_atoms, 3))
Exemplo n.º 10
0
  def setUp(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))

    # simple flat ring
    self.cycle4 = MolFromSmiles('C1CCC1')
    self.cycle4.Compute2DCoords()

    # load and sanitize two real molecules
    _, self.prot = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_protein_fixer_rdkit.pdb'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True)

    _, self.lig = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_ligand.sdf'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True)
Exemplo n.º 11
0
    def setUp(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))

        # simple flat ring
        self.cycle4 = MolFromSmiles('C1CCC1')
        self.cycle4.Compute2DCoords()

        # load and sanitize two real molecules
        _, self.prot = rgf.load_molecule(os.path.join(
            current_dir, '3ws9_protein_fixer_rdkit.pdb'),
                                         add_hydrogens=False,
                                         calc_charges=False,
                                         sanitize=True)

        _, self.lig = rgf.load_molecule(os.path.join(current_dir,
                                                     '3ws9_ligand.sdf'),
                                        add_hydrogens=False,
                                        calc_charges=False,
                                        sanitize=True)
Exemplo n.º 12
0
  def test_voxelize(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)

    centroid = rgf.compute_centroid(lig_xyz)
    prot_xyz = rgf.subtract_centroid(prot_xyz, centroid)
    lig_xyz = rgf.subtract_centroid(lig_xyz, centroid)

    prot_ecfp_dict, lig_ecfp_dict = rgf.featurize_binding_pocket_ecfp(
        prot_xyz, prot_rdk, lig_xyz, lig_rdk)

    box_w = 20
    f_power = 5

    rgf_featurizer = rgf.RdkitGridFeaturizer(
        box_width=box_w,
        ecfp_power=f_power,
        feature_types=['all_combined'],
        flatten=True,
        sanitize=True)

    prot_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        prot_xyz,
        feature_dict=prot_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(prot_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = prot_tensor.sum()
    # protein is too big for the box, some features should be missing
    self.assertGreater(all_features, 0)
    self.assertLess(all_features, prot_rdk.GetNumAtoms())

    lig_tensor = rgf_featurizer._voxelize(
        rgf.convert_atom_to_voxel,
        rgf.hash_ecfp,
        lig_xyz,
        feature_dict=lig_ecfp_dict,
        channel_power=f_power)
    self.assertEqual(lig_tensor.shape, tuple([box_w] * 3 + [2**f_power]))
    all_features = lig_tensor.sum()
    # whole ligand should fit in the box
    self.assertEqual(all_features, lig_rdk.GetNumAtoms())
Exemplo n.º 13
0
 def test_load_molecule(self):
   # adding hydrogens and charges is tested in dc.utils
   for add_hydrogens in (True, False):
     for calc_charges in (True, False):
       mol_xyz, mol_rdk = rgf.load_molecule(self.ligand_file, add_hydrogens,
                                            calc_charges)
       num_atoms = mol_rdk.GetNumAtoms()
       self.assertIsInstance(mol_xyz, np.ndarray)
       self.assertIsInstance(mol_rdk, Mol)
       self.assertEqual(mol_xyz.shape, (num_atoms, 3))
Exemplo n.º 14
0
  def test_featurize_splif(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    bins = [(1, 2), (2, 3)]

    dicts = rgf.featurize_splif(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        contact_bins=bins,
        pairwise_distances=distance,
        ecfp_degree=2)
    expected_dicts = [
        rgf.compute_splif_features_in_range(
            prot_rdk, lig_rdk, distance, c_bin, ecfp_degree=2) for c_bin in bins
    ]
    self.assertIsInstance(dicts, list)
    self.assertEqual(dicts, expected_dicts)
Exemplo n.º 15
0
  def test_compute_all_ecfp(self):
    _, mol = rgf.load_molecule(self.ligand_file)
    num_atoms = mol.GetNumAtoms()
    for degree in range(1, 4):
      # TODO test if dict contains smiles

      ecfp_all = rgf.compute_all_ecfp(mol, degree=degree)
      self.assertIsInstance(ecfp_all, dict)
      self.assertEqual(len(ecfp_all), num_atoms)
      self.assertEqual(list(ecfp_all.keys()), list(range(num_atoms)))

      num_ind = np.random.choice(range(1, num_atoms))
      indices = list(np.random.choice(num_atoms, num_ind, replace=False))

      ecfp_selected = rgf.compute_all_ecfp(mol, indices=indices, degree=degree)
      self.assertIsInstance(ecfp_selected, dict)
      self.assertEqual(len(ecfp_selected), num_ind)
      self.assertEqual(sorted(ecfp_selected.keys()), sorted(indices))
Exemplo n.º 16
0
  def test_compute_all_ecfp(self):
    _, mol = rgf.load_molecule(self.ligand_file)
    num_atoms = mol.GetNumAtoms()
    for degree in range(1, 4):
      # TODO test if dict contains smiles

      ecfp_all = rgf.compute_all_ecfp(mol, degree=degree)
      self.assertIsInstance(ecfp_all, dict)
      self.assertEqual(len(ecfp_all), num_atoms)
      self.assertEqual(list(ecfp_all.keys()), list(range(num_atoms)))

      num_ind = np.random.choice(range(1, num_atoms))
      indices = list(np.random.choice(num_atoms, num_ind, replace=False))

      ecfp_selected = rgf.compute_all_ecfp(mol, indices=indices, degree=degree)
      self.assertIsInstance(ecfp_selected, dict)
      self.assertEqual(len(ecfp_selected), num_ind)
      self.assertEqual(sorted(ecfp_selected.keys()), sorted(indices))