Пример #1
0
    def test_pdbqt_to_pdb(self):
        """Test that a PDBQT molecule can be converted back in to PDB."""
        xyz, mol = rdkit_util.load_molecule(self.protein_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        with tempfile.TemporaryDirectory() as tmp:
            out_pdb = os.path.join(tmp, "mol.pdb")
            out_pdbqt = os.path.join(tmp, "mol.pdbqt")

            rdkit_util.write_molecule(mol, out_pdb, is_protein=True)
            rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

            pdb_block = pdbqt_utils.pdbqt_to_pdb(out_pdbqt)
            from rdkit import Chem
            pdb_mol = Chem.MolFromPDBBlock(pdb_block,
                                           sanitize=False,
                                           removeHs=False)

            xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt,
                                                      add_hydrogens=False,
                                                      calc_charges=False)

        assert pdb_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms()
        for atom_idx in range(pdb_mol.GetNumAtoms()):
            atom1 = pdb_mol.GetAtoms()[atom_idx]
            atom2 = pdbqt_mol.GetAtoms()[atom_idx]
            assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Пример #2
0
def extract_active_site(protein_file, ligand_file, cutoff=4):
  """Extracts a box for the active site."""
  protein_coords = rdkit_util.load_molecule(
      protein_file, add_hydrogens=False)[0]
  ligand_coords = rdkit_util.load_molecule(
      ligand_file, add_hydrogens=True, calc_charges=True)[0]
  num_ligand_atoms = len(ligand_coords)
  num_protein_atoms = len(protein_coords)
  pocket_inds = []
  pocket_atoms = set([])
  for lig_atom_ind in range(num_ligand_atoms):
    lig_atom = ligand_coords[lig_atom_ind]
    for protein_atom_ind in range(num_protein_atoms):
      protein_atom = protein_coords[protein_atom_ind]
      if np.linalg.norm(lig_atom - protein_atom) < cutoff:
        if protein_atom_ind not in pocket_atoms:
          pocket_atoms = pocket_atoms.union(set([protein_atom_ind]))
  # Should be an array of size (n_pocket_atoms, 3)
  pocket_atoms = list(pocket_atoms)
  n_pocket_atoms = len(pocket_atoms)
  pocket_coords = np.zeros((n_pocket_atoms, 3))
  for ind, pocket_ind in enumerate(pocket_atoms):
    pocket_coords[ind] = protein_coords[pocket_ind]

  x_min = int(np.floor(np.amin(pocket_coords[:, 0])))
  x_max = int(np.ceil(np.amax(pocket_coords[:, 0])))
  y_min = int(np.floor(np.amin(pocket_coords[:, 1])))
  y_max = int(np.ceil(np.amax(pocket_coords[:, 1])))
  z_min = int(np.floor(np.amin(pocket_coords[:, 2])))
  z_max = int(np.ceil(np.amax(pocket_coords[:, 2])))
  return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms,
          pocket_coords)
Пример #3
0
def extract_active_site(protein_file, ligand_file, cutoff=4):
  """Extracts a box for the active site."""
  protein_coords = rdkit_util.load_molecule(
      protein_file, add_hydrogens=False)[0]
  ligand_coords = rdkit_util.load_molecule(
      ligand_file, add_hydrogens=True, calc_charges=True)[0]
  num_ligand_atoms = len(ligand_coords)
  num_protein_atoms = len(protein_coords)
  pocket_inds = []
  pocket_atoms = set([])
  for lig_atom_ind in range(num_ligand_atoms):
    lig_atom = ligand_coords[lig_atom_ind]
    for protein_atom_ind in range(num_protein_atoms):
      protein_atom = protein_coords[protein_atom_ind]
      if np.linalg.norm(lig_atom - protein_atom) < cutoff:
        if protein_atom_ind not in pocket_atoms:
          pocket_atoms = pocket_atoms.union(set([protein_atom_ind]))
  # Should be an array of size (n_pocket_atoms, 3)
  pocket_atoms = list(pocket_atoms)
  n_pocket_atoms = len(pocket_atoms)
  pocket_coords = np.zeros((n_pocket_atoms, 3))
  for ind, pocket_ind in enumerate(pocket_atoms):
    pocket_coords[ind] = protein_coords[pocket_ind]

  x_min = int(np.floor(np.amin(pocket_coords[:, 0])))
  x_max = int(np.ceil(np.amax(pocket_coords[:, 0])))
  y_min = int(np.floor(np.amin(pocket_coords[:, 1])))
  y_max = int(np.ceil(np.amax(pocket_coords[:, 1])))
  z_min = int(np.floor(np.amin(pocket_coords[:, 2])))
  z_max = int(np.ceil(np.amax(pocket_coords[:, 2])))
  return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms,
          pocket_coords)
Пример #4
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    try:
      frag1_coords, frag1_mol = rdkit_util.load_molecule(
          mol_pdb_file, is_protein=False, sanitize=True, add_hydrogens=False)
      frag2_coords, frag2_mol = rdkit_util.load_molecule(
          protein_pdb_file, is_protein=True, sanitize=True, add_hydrogens=False)
    except MoleculeLoadException:
      # Currently handles loading failures by returning None
      # TODO: Is there a better handling procedure?
      logging.warning("Some molecules cannot be loaded by Rdkit. Skipping")
      return None
    system_mol = rdkit_util.merge_molecules([frag1_mol, frag2_mol])
    system_coords = rdkit_util.get_xyz_from_mol(system_mol)

    frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol)
    frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol)
    system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol)

    try:
      frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol(
          frag1_coords, frag1_mol, self.frag1_num_atoms)

      frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol(
          frag2_coords, frag2_mol, self.frag2_num_atoms)

      system_coords, system_neighbor_list, system_z = self.featurize_mol(
          system_coords, system_mol, self.complex_num_atoms)
    except ValueError as e:
      logging.warning(
          "max_atoms was set too low. Some complexes too large and skipped")
      return None

    return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \
           system_coords, system_neighbor_list, system_z
Пример #5
0
def extract_active_site(protein_file, ligand_file, cutoff=4):
  """Extracts a box for the active site.

  Params
  ------
  protein_file: str
    Location of protein PDB
  ligand_file: str
    Location of ligand input file
  cutoff: int, optional
    The distance in angstroms from the protein pocket to
    consider for featurization.

  Returns
  -------
  A tuple of `(CoordinateBox, np.ndarray)` where the second entry is
  of shape `(N, 3)` with `N` the number of atoms in the active site.
  """
  protein = rdkit_util.load_molecule(protein_file, add_hydrogens=False)
  ligand = rdkit_util.load_molecule(
      ligand_file, add_hydrogens=True, calc_charges=True)
  protein_contacts, ligand_contacts = get_contact_atom_indices(
      [protein, ligand], cutoff=cutoff)
  protein_coords = protein[0]
  pocket_coords = protein_coords[protein_contacts]

  x_min = int(np.floor(np.amin(pocket_coords[:, 0])))
  x_max = int(np.ceil(np.amax(pocket_coords[:, 0])))
  y_min = int(np.floor(np.amin(pocket_coords[:, 1])))
  y_max = int(np.ceil(np.amax(pocket_coords[:, 1])))
  z_min = int(np.floor(np.amin(pocket_coords[:, 2])))
  z_max = int(np.ceil(np.amax(pocket_coords[:, 2])))
  box = box_utils.CoordinateBox((x_min, x_max), (y_min, y_max), (z_min, z_max))
  return (box, pocket_coords)
Пример #6
0
    def test_pdbqt_to_pdb(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        protein_file = os.path.join(current_dir,
                                    "../../dock/tests/1jld_protein.pdb")
        xyz, mol = rdkit_util.load_molecule(protein_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        out_pdb = "/tmp/mol.pdb"
        out_pdbqt = "/tmp/mol.pdbqt"

        rdkit_util.write_molecule(mol, out_pdb)
        rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

        pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt)
        from rdkit import Chem
        pdb_mol = Chem.MolFromPDBBlock(pdb_block,
                                       sanitize=False,
                                       removeHs=False)

        xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt,
                                                  add_hydrogens=False,
                                                  calc_charges=False)

        assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms())
        for atom_idx in range(pdb_mol.GetNumAtoms()):
            atom1 = pdb_mol.GetAtoms()[atom_idx]
            atom2 = pdbqt_mol.GetAtoms()[atom_idx]
            assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
        os.remove(out_pdb)
        os.remove(out_pdbqt)
Пример #7
0
  def test_pdbqt_to_pdb(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir,
                                "../../dock/tests/1jld_protein.pdb")
    xyz, mol = rdkit_util.load_molecule(
        protein_file, calc_charges=False, add_hydrogens=False)
    out_pdb = "/tmp/mol.pdb"
    out_pdbqt = "/tmp/mol.pdbqt"

    rdkit_util.write_molecule(mol, out_pdb)
    rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

    pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt)
    pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False)

    xyz, pdbqt_mol = rdkit_util.load_molecule(
        out_pdbqt, add_hydrogens=False, calc_charges=False)

    assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms())
    for atom_idx in range(pdb_mol.GetNumAtoms()):
      atom1 = pdb_mol.GetAtoms()[atom_idx]
      atom2 = pdbqt_mol.GetAtoms()[atom_idx]
      assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
    os.remove(out_pdb)
    os.remove(out_pdbqt)
Пример #8
0
 def test_load_molecule(self):
   current_dir = os.path.dirname(os.path.realpath(__file__))
   ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
   xyz, mol = rdkit_util.load_molecule(
       ligand_file, calc_charges=False, add_hydrogens=False)
   assert_true(xyz is not None)
   assert_true(mol is not None)
Пример #9
0
 def test_merge_molecules(self):
     current_dir = os.path.dirname(os.path.realpath(__file__))
     ligand_file = os.path.join(current_dir,
                                "../../dock/tests/1jld_ligand.sdf")
     xyz, mol = rdkit_util.load_molecule(ligand_file,
                                         calc_charges=False,
                                         add_hydrogens=False)
     num_mol_atoms = mol.GetNumAtoms()
     # self.ligand_file is for 3ws9_ligand.sdf
     oth_xyz, oth_mol = rdkit_util.load_molecule(self.ligand_file,
                                                 calc_charges=False,
                                                 add_hydrogens=False)
     num_oth_mol_atoms = oth_mol.GetNumAtoms()
     merged = rdkit_util.merge_molecules([mol, oth_mol])
     merged_num_atoms = merged.GetNumAtoms()
     assert merged_num_atoms == num_mol_atoms + num_oth_mol_atoms
Пример #10
0
 def find_pockets(self, protein_file, ligand_file):
   """Find list of suitable binding pockets on protein."""
   protein_coords = rdkit_util.load_molecule(
       protein_file, add_hydrogens=False, calc_charges=False)[0]
   ligand_coords = rdkit_util.load_molecule(
       ligand_file, add_hydrogens=False, calc_charges=False)[0]
   boxes = get_all_boxes(protein_coords, self.pad)
   mapping = boxes_to_atoms(protein_coords, boxes)
   pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes)
   pocket_coords = []
   for pocket in pockets:
     atoms = pocket_atoms_map[pocket]
     coords = np.zeros((len(atoms), 3))
     for ind, atom in enumerate(atoms):
       coords[ind] = protein_coords[atom]
     pocket_coords.append(coords)
   return pockets, pocket_atoms_map, pocket_coords
Пример #11
0
 def find_pockets(self, protein_file, ligand_file):
   """Find list of suitable binding pockets on protein."""
   protein_coords = rdkit_util.load_molecule(
       protein_file, add_hydrogens=False, calc_charges=False)[0]
   ligand_coords = rdkit_util.load_molecule(
       ligand_file, add_hydrogens=False, calc_charges=False)[0]
   boxes = get_all_boxes(protein_coords, self.pad)
   mapping = boxes_to_atoms(protein_coords, boxes)
   pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes)
   pocket_coords = []
   for pocket in pockets:
     atoms = pocket_atoms_map[pocket]
     coords = np.zeros((len(atoms), 3))
     for ind, atom in enumerate(atoms):
       coords[ind] = protein_coords[atom]
     pocket_coords.append(coords)
   return pockets, pocket_atoms_map, pocket_coords
Пример #12
0
 def test_load_molecule2(self):
     current_dir = os.path.dirname(os.path.realpath(__file__))
     ligand_file = os.path.join(current_dir,
                                "../../dock/tests/1jld_ligand.sdf")
     xyz, mol = rdkit_util.load_molecule(ligand_file,
                                         calc_charges=False,
                                         add_hydrogens=False)
     assert xyz is not None
     assert mol is not None
Пример #13
0
  def test_write_molecule(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
    xyz, mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=False)

    outfile = "/tmp/mol.sdf"
    rdkit_util.write_molecule(mol, outfile)

    xyz, mol2 = rdkit_util.load_molecule(
        outfile, calc_charges=False, add_hydrogens=False)

    assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms())
    for atom_idx in range(mol.GetNumAtoms()):
      atom1 = mol.GetAtoms()[atom_idx]
      atom2 = mol.GetAtoms()[atom_idx]
      assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
    os.remove(outfile)
Пример #14
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    """
    Compute neighbor list for complex.

    Parameters
    ----------
    mol_pdb: list
      Should be a list of lines of the PDB file.
    complex_pdb: list
      Should be a list of lines of the PDB file.
    """
    mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file)
    protein_coords, protein_mol = rdkit_util.load_molecule(protein_pdb_file)
    system_coords = rdkit_util.merge_molecules_xyz(mol_coords, protein_coords)

    system_neighbor_list = compute_neighbor_list(
        system_coords, self.neighbor_cutoff, self.max_num_neighbors, None)

    return (system_coords, system_neighbor_list)
Пример #15
0
 def find_all_pockets(self, protein_file):
   """Find list of binding pockets on protein.
   
   Parameters
   ----------
   protein_file: str
     Protein to load in.
   """
   coords, _ = rdkit_util.load_molecule(protein_file)
   return box_utils.get_face_boxes(coords, self.pad)
Пример #16
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    """
    Compute neighbor list for complex.

    Parameters
    ----------
    mol_pdb_file: Str 
      Filename for ligand pdb file. 
    protein_pdb_file: Str 
      Filename for protein pdb file. 
    """
    mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file)
    protein_coords, protein_mol = rdkit_util.load_molecule(protein_pdb_file)
    system_coords = rdkit_util.merge_molecules_xyz([mol_coords, protein_coords])

    system_neighbor_list = compute_neighbor_list(
        system_coords, self.neighbor_cutoff, self.max_num_neighbors, None)

    return (system_coords, system_neighbor_list)
Пример #17
0
  def test_get_xyz_from_mol(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")

    xyz, mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=False)
    xyz2 = rdkit_util.get_xyz_from_mol(mol)

    equal_array = np.all(xyz == xyz2)
    assert_true(equal_array)
Пример #18
0
 def test_convert_protein_to_pdbqt(self):
     """Test a protein in a PDB can be converted to PDBQT."""
     from rdkit import Chem
     xyz, mol = rdkit_util.load_molecule(self.protein_file,
                                         calc_charges=False,
                                         add_hydrogens=False)
     with tempfile.TemporaryDirectory() as tmp:
         outfile = os.path.join(tmp, "mol.pdbqt")
         writer = Chem.PDBWriter(outfile)
         writer.write(mol)
         writer.close()
         pdbqt_utils.convert_protein_to_pdbqt(mol, outfile)
         pdbqt_xyz, pdbqt_mol = rdkit_util.load_molecule(
             outfile, add_hydrogens=False, calc_charges=False)
     assert pdbqt_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms()
     for atom_idx in range(pdbqt_mol.GetNumAtoms()):
         atom1 = pdbqt_mol.GetAtoms()[atom_idx]
         atom2 = pdbqt_mol.GetAtoms()[atom_idx]
         assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Пример #19
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    """
    Compute neighbor list for complex.

    Parameters
    ----------
    mol_pdb: list
      Should be a list of lines of the PDB file.
    complex_pdb: list
      Should be a list of lines of the PDB file.
    """
    mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file)
    protein_coords, protein_mol = rdkit_util.load_molecule(protein_pdb_file)
    system_coords = rdkit_util.merge_molecules_xyz(mol_coords, protein_coords)

    system_neighbor_list = compute_neighbor_list(
        system_coords, self.neighbor_cutoff, self.max_num_neighbors, None)

    return (system_coords, system_neighbor_list)
Пример #20
0
    def _featurize_complex(self, frag1_pdb_file, frag2_pdb_file):
        """Featurize fragments and complex.

    Parameters
    ----------
    frag1_pdb_file: string
      Location of frag1_pdb_file.
    frag2_pdb_file: string
      Location of frag2_pdb_file.

    Returns
    -------
    retval: tuple
      Tuple containing coordinates, neighbor list, and atomic number for
      fragment 1, fragment 2, and complex

    """

        try:
            frag1_mol = rdkit_util.load_molecule(frag1_pdb_file,
                                                 add_hydrogens=False,
                                                 calc_charges=False)[1]
            frag2_mol = rdkit_util.load_molecule(frag2_pdb_file,
                                                 add_hydrogens=False,
                                                 calc_charges=False)[1]
        except:
            frag1_mol = None
            frag2_mol = None
        if frag1_mol and frag2_mol:
            frag1_coords, frag1_neighbor_list, frag1_z = self.frag1_featurizer._featurize(
                frag1_mol)
            frag2_coords, frag2_neighbor_list, frag2_z = self.frag2_featurizer._featurize(
                frag2_mol)
            complex_mol = Chem.rdmolops.CombineMols(frag1_mol, frag2_mol)
            complex_coords, complex_neighbor_list, complex_z = self.complex_featurizer._featurize(
                complex_mol)
            return (frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords,
                    frag2_neighbor_list, frag2_z, complex_coords,
                    complex_neighbor_list, complex_z)
        else:
            print("failed to featurize")
            return (None, None, None, None, None, None, None, None, None)
Пример #21
0
 def test_merge_molecules_xyz(self):
   current_dir = os.path.dirname(os.path.realpath(__file__))
   ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
   xyz, mol = rdkit_util.load_molecule(
       ligand_file, calc_charges=False, add_hydrogens=False)
   merged = rdkit_util.merge_molecules_xyz(xyz, xyz)
   for i in range(len(xyz)):
     first_atom_equal = np.all(xyz[i] == merged[i])
     second_atom_equal = np.all(xyz[i] == merged[i + len(xyz)])
     assert_true(first_atom_equal)
     assert_true(second_atom_equal)
Пример #22
0
 def test_load_molecule(self):
     # adding hydrogens and charges is tested in dc.utils
     from rdkit.Chem.AllChem import Mol
     for add_hydrogens in (True, False):
         for calc_charges in (True, False):
             mol_xyz, mol_rdk = rdkit_util.load_molecule(
                 self.ligand_file, add_hydrogens, calc_charges)
             num_atoms = mol_rdk.GetNumAtoms()
             self.assertIsInstance(mol_xyz, np.ndarray)
             self.assertIsInstance(mol_rdk, Mol)
             self.assertEqual(mol_xyz.shape, (num_atoms, 3))
Пример #23
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file)
    frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file)
    system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol)
    system_coords = rdkit_util.get_xyz_from_mol(system_mol)

    frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol)
    frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol)
    system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol)

    frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol(
        frag1_coords, frag1_mol, self.frag1_num_atoms)

    frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol(
        frag2_coords, frag2_mol, self.frag2_num_atoms)

    system_coords, system_neighbor_list, system_z = self.featurize_mol(
        system_coords, system_mol, self.complex_num_atoms)

    return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \
           system_coords, system_neighbor_list, system_z
Пример #24
0
    def test_get_face_boxes_for_protein(self):
        """Tests that binding pockets are detected."""
        current_dir = os.path.dirname(os.path.realpath(__file__))
        protein_file = os.path.join(current_dir, "1jld_protein.pdb")
        ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
        coords = rdkit_util.load_molecule(protein_file)[0]

        boxes = box_utils.get_face_boxes(coords)
        assert isinstance(boxes, list)
        # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max))
        for pocket in boxes:
            assert isinstance(pocket, box_utils.CoordinateBox)
Пример #25
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file)
    frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file)
    system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol)
    system_coords = rdkit_util.get_xyz_from_mol(system_mol)

    frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol)
    frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol)
    system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol)

    frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol(
        frag1_coords, frag1_mol, self.frag1_num_atoms)

    frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol(
        frag2_coords, frag2_mol, self.frag2_num_atoms)

    system_coords, system_neighbor_list, system_z = self.featurize_mol(
        system_coords, system_mol, self.complex_num_atoms)

    return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \
           system_coords, system_neighbor_list, system_z
Пример #26
0
    def test_write_molecule(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)

        outfile = "/tmp/mol.sdf"
        rdkit_util.write_molecule(mol, outfile)

        xyz, mol2 = rdkit_util.load_molecule(outfile,
                                             calc_charges=False,
                                             add_hydrogens=False)

        assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms())
        for atom_idx in range(mol.GetNumAtoms()):
            atom1 = mol.GetAtoms()[atom_idx]
            atom2 = mol.GetAtoms()[atom_idx]
            assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
        os.remove(outfile)
Пример #27
0
    def test_get_xyz_from_mol(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")

        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        xyz2 = rdkit_util.get_xyz_from_mol(mol)

        equal_array = np.all(xyz == xyz2)
        assert equal_array
Пример #28
0
    def test_write_molecule(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)

        with tempfile.TemporaryDirectory() as tmp:
            outfile = os.path.join(tmp, "mol.sdf")
            rdkit_util.write_molecule(mol, outfile)

            xyz, mol2 = rdkit_util.load_molecule(outfile,
                                                 calc_charges=False,
                                                 add_hydrogens=False)

        assert mol.GetNumAtoms() == mol2.GetNumAtoms()
        for atom_idx in range(mol.GetNumAtoms()):
            atom1 = mol.GetAtoms()[atom_idx]
            atom2 = mol.GetAtoms()[atom_idx]
            assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Пример #29
0
 def test_merge_molecules_xyz(self):
     current_dir = os.path.dirname(os.path.realpath(__file__))
     ligand_file = os.path.join(current_dir,
                                "../../dock/tests/1jld_ligand.sdf")
     xyz, mol = rdkit_util.load_molecule(ligand_file,
                                         calc_charges=False,
                                         add_hydrogens=False)
     merged = rdkit_util.merge_molecules_xyz([xyz, xyz])
     for i in range(len(xyz)):
         first_atom_equal = np.all(xyz[i] == merged[i])
         second_atom_equal = np.all(xyz[i] == merged[i + len(xyz)])
         assert first_atom_equal
         assert second_atom_equal
Пример #30
0
  def test_compute_charges(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
    xyz, mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=True)
    rdkit_util.compute_charges(mol)

    has_a_charge = False
    for atom_idx in range(mol.GetNumAtoms()):
      atom = mol.GetAtoms()[atom_idx]
      value = atom.GetProp(str("_GasteigerCharge"))
      if value != 0:
        has_a_charge = True
    assert_true(has_a_charge)
Пример #31
0
  def test_boxes_to_atoms(self, postfix_directory):
    """Test that mapping of protein atoms to boxes is meaningful."""
    protein_file = os.path.join(postfix_directory, "PfATP4.pdb")
    ligand_file = os.path.join(postfix_directory, "SJ733.pdb")
    coords = rdkit_util.load_molecule(protein_file)[0]
    boxes = dc.dock.binding_pocket.get_all_boxes(coords)

    mapping = dc.dock.binding_pocket.boxes_to_atoms(coords, boxes)
    assert isinstance(mapping, dict)
    for box, box_atoms in mapping.items():
      (x_min, x_max), (y_min, y_max), (z_min, z_max) = box
      for atom_ind in box_atoms:
        atom = coords[atom_ind]
        assert x_min <= atom[0] and atom[0] <= x_max
        assert y_min <= atom[1] and atom[1] <= y_max
        assert z_min <= atom[2] and atom[2] <= z_max
Пример #32
0
    def test_compute_charges(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=True)
        rdkit_util.compute_charges(mol)

        has_a_charge = False
        for atom_idx in range(mol.GetNumAtoms()):
            atom = mol.GetAtoms()[atom_idx]
            value = atom.GetProp(str("_GasteigerCharge"))
            if value != 0:
                has_a_charge = True
        assert has_a_charge
Пример #33
0
    def test_boxes_to_atoms(self):
        """Test that mapping of protein atoms to boxes is meaningful."""
        current_dir = os.path.dirname(os.path.realpath(__file__))
        protein_file = os.path.join(current_dir, "1jld_protein.pdb")
        ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
        coords = rdkit_util.load_molecule(protein_file)[0]
        boxes = dc.dock.binding_pocket.get_all_boxes(coords)

        mapping = dc.dock.binding_pocket.boxes_to_atoms(coords, boxes)
        assert isinstance(mapping, dict)
        for box, box_atoms in mapping.items():
            (x_min, x_max), (y_min, y_max), (z_min, z_max) = box
            for atom_ind in box_atoms:
                atom = coords[atom_ind]
                assert x_min <= atom[0] and atom[0] <= x_max
                assert y_min <= atom[1] and atom[1] <= y_max
                assert z_min <= atom[2] and atom[2] <= z_max
Пример #34
0
  def test_boxes_to_atoms(self):
    """Test that mapping of protein atoms to boxes is meaningful."""
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir, "1jld_protein.pdb")
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
    coords = rdkit_util.load_molecule(protein_file)[0]
    boxes = dc.dock.binding_pocket.get_all_boxes(coords)

    mapping = dc.dock.binding_pocket.boxes_to_atoms(coords, boxes)
    assert isinstance(mapping, dict)
    for box, box_atoms in mapping.items():
      (x_min, x_max), (y_min, y_max), (z_min, z_max) = box
      for atom_ind in box_atoms:
        atom = coords[atom_ind]
        assert x_min <= atom[0] and atom[0] <= x_max
        assert y_min <= atom[1] and atom[1] <= y_max
        assert z_min <= atom[2] and atom[2] <= z_max
Пример #35
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
    """Outputs a hydrogenated pdb and a pdbqt with partial charges.

  Takes an input file in specified format. Generates two outputs:

  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.

  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.

  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
    mol = rdkit_util.load_molecule(input_file,
                                   add_hydrogens=True,
                                   calc_charges=True)[1]
    if verbose:
        print("Create pdb with hydrogens added")
    rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein)
    if verbose:
        print("Create a pdbqt file from the hydrogenated pdb above.")
    rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein)

    if protein:
        print("Removing ROOT/ENDROOT/TORSDOF")
        with open(pdbqt_output) as f:
            pdbqt_lines = f.readlines()
        filtered_lines = []
        for line in pdbqt_lines:

            filtered_lines.append(line)
        with open(pdbqt_output, "w") as f:
            f.writelines(filtered_lines)
Пример #36
0
  def test_add_hydrogens_to_mol(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
    xyz, mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=False)
    original_hydrogen_count = 0
    for atom_idx in range(mol.GetNumAtoms()):
      atom = mol.GetAtoms()[atom_idx]
      if atom.GetAtomicNum() == 1:
        original_hydrogen_count += 1

    mol = rdkit_util.add_hydrogens_to_mol(mol)
    after_hydrogen_count = 0
    for atom_idx in range(mol.GetNumAtoms()):
      atom = mol.GetAtoms()[atom_idx]
      if atom.GetAtomicNum() == 1:
        after_hydrogen_count += 1
    assert_true(after_hydrogen_count >= original_hydrogen_count)
Пример #37
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
  """Outputs a hydrogenated pdb and a pdbqt with partial charges.

  Takes an input file in specified format. Generates two outputs:

  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.

  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.

  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
  mol = rdkit_util.load_molecule(
      input_file, add_hydrogens=True, calc_charges=True)[1]
  if verbose:
    logging.info("Create pdb with hydrogens added")
  rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein)
  if verbose:
    logging.info("Create a pdbqt file from the hydrogenated pdb above.")
  rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein)

  if protein:
    logging.info("Removing ROOT/ENDROOT/TORSDOF")
    with open(pdbqt_output) as f:
      pdbqt_lines = f.readlines()
    filtered_lines = []
    for line in pdbqt_lines:

      filtered_lines.append(line)
    with open(pdbqt_output, "w") as f:
      f.writelines(filtered_lines)
Пример #38
0
  def test_get_all_boxes(self):
    """Tests that binding pockets are detected."""
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir, "1jld_protein.pdb")
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
    coords = rdkit_util.load_molecule(protein_file)[0]

    boxes = dc.dock.binding_pocket.get_all_boxes(coords)
    assert isinstance(boxes, list)
    # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max))
    for pocket in boxes:
      assert len(pocket) == 3
      assert len(pocket[0]) == 2
      assert len(pocket[1]) == 2
      assert len(pocket[2]) == 2
      (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
      assert x_min < x_max
      assert y_min < y_max
      assert z_min < z_max
Пример #39
0
    def test_get_all_boxes(self):
        """Tests that binding pockets are detected."""
        current_dir = os.path.dirname(os.path.realpath(__file__))
        protein_file = os.path.join(current_dir, "1jld_protein.pdb")
        ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
        coords = rdkit_util.load_molecule(protein_file)[0]

        boxes = dc.dock.binding_pocket.get_all_boxes(coords)
        assert isinstance(boxes, list)
        # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max))
        for pocket in boxes:
            assert len(pocket) == 3
            assert len(pocket[0]) == 2
            assert len(pocket[1]) == 2
            assert len(pocket[2]) == 2
            (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
            assert x_min < x_max
            assert y_min < y_max
            assert z_min < z_max
Пример #40
0
  def test_get_all_boxes(self, postfix_directory):
    """Tests that binding pockets are detected."""
    print("Test_All_Boxes")
    protein_file = os.path.join(postfix_directory, "PfATP4.pdb")
    ligand_file = os.path.join(postfix_directory, "SJ733.pdb")
    coords = rdkit_util.load_molecule(protein_file)[0]

    boxes = dc.dock.binding_pocket.get_all_boxes(coords)
    assert isinstance(boxes, list)
    # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max))
    for pocket in boxes:
      assert len(pocket) == 3
      assert len(pocket[0]) == 2
      assert len(pocket[1]) == 2
      assert len(pocket[2]) == 2
      (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
      assert x_min < x_max
      assert y_min < y_max
      assert z_min < z_max
Пример #41
0
    def test_add_hydrogens_to_mol(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        original_hydrogen_count = 0
        for atom_idx in range(mol.GetNumAtoms()):
            atom = mol.GetAtoms()[atom_idx]
            if atom.GetAtomicNum() == 1:
                original_hydrogen_count += 1

        mol = rdkit_util.add_hydrogens_to_mol(mol)
        after_hydrogen_count = 0
        for atom_idx in range(mol.GetNumAtoms()):
            atom = mol.GetAtoms()[atom_idx]
            if atom.GetAtomicNum() == 1:
                after_hydrogen_count += 1
        assert_true(after_hydrogen_count >= original_hydrogen_count)
Пример #42
0
  def find_pockets(self, macromolecule_file):
    """Find list of suitable binding pockets on protein.

    This function computes putative binding pockets on this protein.
    This class uses the `ConvexHull` to compute binding pockets. Each
    face of the hull is converted into a coordinate box used for
    binding.

    Params
    ------
    macromolecule_file: str
      Location of the macromolecule file to load

    Returns
    -------
    List of pockets. Each pocket is a `CoordinateBox`
    """
    coords = rdkit_util.load_molecule(
        macromolecule_file, add_hydrogens=False, calc_charges=False)[0]
    boxes = box_utils.get_face_boxes(coords, self.pad)
    boxes = box_utils.merge_overlapping_boxes(boxes)
    return boxes
Пример #43
0
  def featurize(self, protein_file, pockets):
    """
    Calculate atomic coodinates.

    Params
    ------
    protein_file: str
      Location of PDB file. Will be loaded by MDTraj
    pockets: list[CoordinateBox]
      List of `dc.utils.CoordinateBox` objects.

    Returns
    -------
    A numpy array of shale `(len(pockets), n_residues)`
    """
    import mdtraj
    protein_coords = rdkit_util.load_molecule(
        protein_file, add_hydrogens=False, calc_charges=False)[0]
    mapping = boxes_to_atoms(protein_coords, pockets)
    protein = mdtraj.load(protein_file)
    n_pockets = len(pockets)
    n_residues = len(BindingPocketFeaturizer.residues)
    res_map = dict(zip(BindingPocketFeaturizer.residues, range(n_residues)))
    all_features = np.zeros((n_pockets, n_residues))
    for pocket_num, pocket in enumerate(pockets):
      pocket_atoms = mapping[pocket]
      for ind, atom in enumerate(pocket_atoms):
        atom_name = str(protein.top.atom(atom))
        # atom_name is of format RESX-ATOMTYPE
        # where X is a 1 to 4 digit number
        residue = atom_name[:3]
        if residue not in res_map:
          logger.info("Warning: Non-standard residue in PDB file")
          continue
        atomtype = atom_name.split("-")[1]
        all_features[pocket_num, res_map[residue]] += 1
    return all_features
Пример #44
0
    def test_apply_pdbfixer(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        original_hydrogen_count = 0
        for atom_idx in range(mol.GetNumAtoms()):
            atom = mol.GetAtoms()[atom_idx]
            if atom.GetAtomicNum() == 1:
                original_hydrogen_count += 1

        assert mol is not None
        mol = rdkit_util.apply_pdbfixer(mol,
                                        hydrogenate=True,
                                        is_protein=False)
        assert mol is not None
        after_hydrogen_count = 0
        for atom_idx in range(mol.GetNumAtoms()):
            atom = mol.GetAtoms()[atom_idx]
            if atom.GetAtomicNum() == 1:
                after_hydrogen_count += 1
        assert after_hydrogen_count >= original_hydrogen_count
Пример #45
0
  def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
    """Computes grid featurization of protein/ligand complex.

    Takes as input filenames pdb of the protein, pdb of the ligand.

    This function then computes the centroid of the ligand; decrements this
    centroid from the atomic coordinates of protein and ligand atoms, and then
    merges the translated protein and ligand. This combined system/complex is then
    saved.

    This function then computes a featurization with scheme specified by the user.
    Parameters
    ----------
    mol_pdb_file: Str 
      Filename for ligand pdb file. 
    protein_pdb_file: Str 
      Filename for protein pdb file. 
    """
    try:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING

      protein_xyz, protein_rdk = load_molecule(
          protein_pdb_file, calc_charges=True, sanitize=self.sanitize)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: Loading protein coordinates took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      ligand_xyz, ligand_rdk = load_molecule(
          mol_pdb_file, calc_charges=True, sanitize=self.sanitize)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: Loading ligand coordinates took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING
    except MoleculeLoadException:
      logging.warning("Some molecules cannot be loaded by Rdkit. Skipping")
      return None

    ############################################################## TIMING
    time1 = time.time()
    ############################################################## TIMING
    centroid = compute_centroid(ligand_xyz)
    ligand_xyz = subtract_centroid(ligand_xyz, centroid)
    protein_xyz = subtract_centroid(protein_xyz, centroid)
    ############################################################## TIMING
    time2 = time.time()
    log("TIMING: Centroid processing took %0.3f s" % (time2 - time1),
        self.verbose)
    ############################################################## TIMING

    pairwise_distances = compute_pairwise_distances(protein_xyz, ligand_xyz)

    transformed_systems = {}
    transformed_systems[(0, 0)] = [protein_xyz, ligand_xyz]

    for i in range(self.nb_rotations):
      rotated_system = rotate_molecules([protein_xyz, ligand_xyz])
      transformed_systems[(i + 1, 0)] = rotated_system

    features_dict = {}
    for system_id, (protein_xyz, ligand_xyz) in transformed_systems.items():
      feature_arrays = []
      for is_flat, function_name in self.feature_types:

        result = self._compute_feature(
            function_name,
            protein_xyz,
            protein_rdk,
            ligand_xyz,
            ligand_rdk,
            pairwise_distances,
        )
        feature_arrays += result

        if self.flatten:
          features_dict[system_id] = np.concatenate(
              [feature_array.flatten() for feature_array in feature_arrays])
        else:
          features_dict[system_id] = np.concatenate(feature_arrays, axis=-1)

    # TODO(rbharath): Is this squeeze OK?
    features = np.squeeze(np.array(list(features_dict.values())))
    return features
Пример #46
0
 def find_all_pockets(self, protein_file):
   """Find list of binding pockets on protein."""
   # protein_coords is (N, 3) tensor
   coords = rdkit_util.load_molecule(protein_file)[0]
   return get_all_boxes(coords, self.pad)
Пример #47
0
  def _transform(self, protein_pdb, ligand_file):
    """Computes featurization of protein/ligand complex.

    Takes as input files (strings) for pdb of the protein, pdb of the ligand,
    and a directory to save intermediate files.

    This function then computes the centroid of the ligand; decrements this
    centroid from the atomic coordinates of protein and ligand atoms, and then
    merges the translated protein and ligand. This combined system/complex is then
    saved.

    This function then computes a featurization with scheme specified by the user.
    """
    ############################################################## TIMING
    time1 = time.time()
    ############################################################## TIMING
    protein_name = str(protein_pdb).split("/")[len(str(protein_pdb).split("/"))
                                               - 2]

    if not self.ligand_only:
      protein_xyz, protein_ob = rdkit_util.load_molecule(
          protein_pdb, calc_charges=True)
    ############################################################## TIMING
    time2 = time.time()
    log("TIMING: Loading protein coordinates took %0.3f s" % (time2 - time1),
        self.verbose)
    ############################################################## TIMING
    ############################################################## TIMING
    time1 = time.time()
    ############################################################## TIMING
    ligand_xyz, ligand_ob = rdkit_util.load_molecule(
        ligand_file, calc_charges=True)
    ############################################################## TIMING
    time2 = time.time()
    log("TIMING: Loading ligand coordinates took %0.3f s" % (time2 - time1),
        self.verbose)
    ############################################################## TIMING

    if "ecfp" in self.feature_types:
      ecfp_array = compute_ecfp_features(ligand_ob, self.ecfp_degree,
                                         self.ecfp_power)
      return ({(0, 0): ecfp_array})

    ############################################################## TIMING
    time1 = time.time()
    ############################################################## TIMING
    centroid = compute_centroid(ligand_xyz)
    ligand_xyz = subtract_centroid(ligand_xyz, centroid)
    if not self.ligand_only:
      protein_xyz = subtract_centroid(protein_xyz, centroid)
    ############################################################## TIMING
    time2 = time.time()
    log("TIMING: Centroid processing took %0.3f s" % (time2 - time1),
        self.verbose)
    ############################################################## TIMING

    if "splif" in self.feature_types:
      splif_array = self._featurize_splif(protein_xyz, protein_ob, ligand_xyz,
                                          ligand_ob)
      return ({(0, 0): splif_array})

    if "flat_combined" in self.feature_types:
      return (self._compute_flat_features(protein_xyz, protein_ob, ligand_xyz,
                                          ligand_ob))

    pairwise_distances = compute_pairwise_distances(protein_xyz, ligand_xyz)
    if "ecfp" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      protein_ecfp_dict, ligand_ecfp_dict = (featurize_binding_pocket_ecfp(
          protein_xyz,
          protein_ob,
          ligand_xyz,
          ligand_ob,
          pairwise_distances,
          cutoff=4.5,
          ecfp_degree=self.ecfp_degree))
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: ecfp voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING
    if "splif" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      splif_dicts = featurize_splif(protein_xyz, protein_ob, ligand_xyz,
                                    ligand_ob, self.contact_bins,
                                    pairwise_distances, self.ecfp_degree)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: splif voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    if "hbond" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      hbond_list = compute_hydrogen_bonds(
          protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances,
          self.hbond_dist_bins, self.hbond_angle_cutoffs, self.ecfp_degree)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: hbond voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    if "sybyl" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      protein_sybyl_dict, ligand_sybyl_dict = featurize_binding_pocket_sybyl(
          protein_xyz,
          protein_ob,
          ligand_xyz,
          ligand_ob,
          pairwise_distances,
          cutoff=7.0)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: sybyl voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    if "pi_stack" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      protein_pi_t, protein_pi_parallel, ligand_pi_t, ligand_pi_parallel = (
          compute_pi_stack(protein_xyz, protein_ob, ligand_xyz, ligand_ob,
                           pairwise_distances))
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: pi_stack voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    if "cation_pi" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      protein_cation_pi, ligand_cation_pi = (compute_binding_pocket_cation_pi(
          protein_xyz, protein_ob, ligand_xyz, ligand_ob))
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: cation_pi voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    if "salt_bridge" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      salt_bridge_list = compute_salt_bridges(
          protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: salt_bridge voxel computataion took %0.3f s" %
          (time2 - time1), self.verbose)
      ############################################################## TIMING

    if "charge" in self.voxel_feature_types:
      ############################################################## TIMING
      time1 = time.time()
      ############################################################## TIMING
      protein_charge_dictionary = compute_charge_dictionary(protein_ob)
      ligand_charge_dictionary = compute_charge_dictionary(ligand_ob)
      ############################################################## TIMING
      time2 = time.time()
      log("TIMING: charge voxel computataion took %0.3f s" % (time2 - time1),
          self.verbose)
      ############################################################## TIMING

    transformed_systems = {}
    transformed_systems[(0, 0)] = [protein_xyz, ligand_xyz]

    for i in range(0, int(self.nb_rotations)):
      rotated_system = rotate_molecules([protein_xyz, ligand_xyz])
      transformed_systems[(i + 1, 0)] = rotated_system
      for j in range(0, int(self.nb_reflections)):
        reflected_system = self._reflect_molecule(rotated_system)
        transformed_systems[(i + 1, j + 1)] = reflected_system

    if "voxel_combined" in self.feature_types:
      features = {}
      for system_id, system in transformed_systems.items():
        protein_xyz = system[0]
        ligand_xyz = system[1]
        feature_tensors = []
        if "ecfp" in self.voxel_feature_types:
          ecfp_tensor = self._voxelize(
              convert_atom_to_voxel,
              hash_ecfp,
              protein_xyz,
              feature_dict=protein_ecfp_dict,
              channel_power=self.ecfp_power)
          ecfp_tensor += self._voxelize(
              convert_atom_to_voxel,
              hash_ecfp,
              ligand_xyz,
              feature_dict=ligand_ecfp_dict,
              channel_power=self.ecfp_power)
          feature_tensors.append(ecfp_tensor)
          print("Completed ecfp tensor")

        if "splif" in self.voxel_feature_types:
          feature_tensors += [
              self._voxelize(
                  convert_atom_pair_to_voxel,
                  hash_ecfp_pair, (protein_xyz, ligand_xyz),
                  feature_dict=splif_dict,
                  channel_power=self.splif_power) for splif_dict in splif_dicts
          ]
          print("Completed splif tensor")

        if "hbond" in self.voxel_feature_types:
          feature_tensors += [
              self._voxelize(
                  convert_atom_pair_to_voxel,
                  None, (protein_xyz, ligand_xyz),
                  feature_list=hbond,
                  channel_power=0) for hbond in hbond_list
          ]
          print("Completed hbond tensor")

        if "sybyl" in self.voxel_feature_types:
          sybyl_partial = partial(hash_sybyl, sybyl_types=self.sybyl_types)
          sybyl_tensor = self._voxelize(
              convert_atom_to_voxel,
              hash_sybyl,
              protein_xyz,
              feature_dict=protein_sybyl_dict,
              nb_channel=len(self.sybyl_types))
          sybyl_tensor += self._voxelize(
              convert_atom_to_voxel,
              hash_sybyl,
              ligand_xyz,
              feature_dict=ligand_sybyl_dict,
              nb_channel=len(self.sybyl_types))
          feature_tensors.append(sybyl_tensor)
          print("Completed sybyl tensor")

        if "pi_stack" in self.voxel_feature_types:
          pi_parallel_tensor = self._voxelize(
              convert_atom_to_voxel,
              None,
              protein_xyz,
              feature_dict=protein_pi_parallel,
              nb_channel=1)
          pi_parallel_tensor += self._voxelize(
              convert_atom_to_voxel,
              None,
              ligand_xyz,
              feature_dict=ligand_pi_parallel,
              nb_channel=1)
          feature_tensors.append(pi_parallel_tensor)

          pi_t_tensor = self._voxelize(
              convert_atom_to_voxel,
              None,
              protein_xyz,
              feature_dict=protein_pi_t,
              nb_channel=1)
          pi_t_tensor += self._voxelize(
              convert_atom_to_voxel,
              None,
              ligand_xyz,
              feature_dict=ligand_pi_t,
              nb_channel=1)
          feature_tensors.append(pi_t_tensor)
          print("Completed pi_stack tensor")

        if "cation_pi" in self.voxel_feature_types:
          cation_pi_tensor = self._voxelize(
              convert_atom_to_voxel,
              None,
              protein_xyz,
              feature_dict=protein_cation_pi,
              nb_channel=1)
          cation_pi_tensor += self._voxelize(
              convert_atom_to_voxel,
              None,
              ligand_xyz,
              feature_dict=ligand_cation_pi,
              nb_channel=1)
          feature_tensors.append(cation_pi_tensor)
          print("Completed cation_pi tensor.")

        if "salt_bridge" in self.voxel_feature_types:
          salt_bridge_tensor = self._voxelize(
              convert_atom_pair_to_voxel,
              None, (protein_xyz, ligand_xyz),
              feature_list=salt_bridge_list,
              nb_channel=1)
          feature_tensors.append(salt_bridge_tensor)

          print("Completed salt_bridge tensor.")

        if "charge" in self.voxel_feature_types:
          charge_tensor = self._voxelize(
              convert_atom_to_voxel,
              None,
              protein_xyz,
              feature_dict=protein_charge_dictionary,
              nb_channel=1,
              dtype="np.float16")
          charge_tensor += self._voxelize(
              convert_atom_to_voxel,
              None,
              ligand_xyz,
              feature_dict=ligand_charge_dictionary,
              nb_channel=1,
              dtype="np.float16")
          feature_tensors.append(charge_tensor)

          print("Completed salt_bridge tensor.")

        if "charge" in self.voxel_feature_types:
          feature_tensor = np.concatenate(
              feature_tensors, axis=3).astype(np.float16)
        else:
          feature_tensor = np.concatenate(
              feature_tensors, axis=3).astype(np.int8)

        if self.flatten:
          feature_tensor = np.squeeze(feature_tensor)

        features[system_id] = feature_tensor

      return (features)
Пример #48
0
def dock_ligands_to_receptors(docking_dir,
                              worker_pool=None,
                              exhaustiveness=None,
                              chosen_receptor=None,
                              restrict_box=True):
  subdirs = glob.glob(os.path.join(docking_dir, '*/'))
  for subdir in subdirs:
    subdir = subdir.rstrip('/')
    receptor_name = os.path.basename(subdir)
    if chosen_receptor is not None and chosen_receptor != receptor_name:
      continue
    print("receptor name = %s" % receptor_name)
    receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name)
    if not os.path.exists(receptor_filename):
      continue

    print("Examining %s" % receptor_filename)

    receptor_mol = rdkit_util.load_molecule(
        os.path.join(subdir, "%s.pdb" % receptor_name))
    protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
    protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])

    box_dims = protein_range + 5.0

    ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt')))
    print("Num ligands = %d" % len(ligands))

    dock_ligand_to_receptor_partial = partial(
        dock_ligand_to_receptor,
        receptor_filename=receptor_filename,
        protein_centroid=protein_centroid,
        box_dims=box_dims,
        subdir=subdir,
        exhaustiveness=exhaustiveness)

    if restrict_box:
      active_ligand = ""
      for ligand in ligands:
        if "CHEM" in ligand:
          active_ligand = ligand
          break

      print("Docking to %s first to ascertain centroid and box dimensions" %
            active_ligand)

      out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand)
      ligand_pybel = rdkit_util.load_molecule(out_pdb_qt)
      ligand_centroid = mol_xyz_util.get_molecule_centroid(ligand_pybel[0])
      print("Protein centroid = %s" % (str(protein_centroid)))
      print("Ligand centroid = %s" % (str(ligand_centroid)))
      box_dims = np.array([20., 20., 20.])
      dock_ligand_to_receptor_partial = partial(
          dock_ligand_to_receptor,
          receptor_filename=receptor_filename,
          protein_centroid=ligand_centroid,
          box_dims=box_dims,
          subdir=subdir,
          exhaustiveness=exhaustiveness)

      print("Finished docking to %s, docking to remainder of ligands now." %
            active_ligand)

    if worker_pool is None:
      for i, ligand_file in enumerate(ligands):
        a = time.time()
        dock_ligand_to_receptor_partial(ligand_file)
        print("took %f seconds to dock single ligand." % (time.time() - a))
    else:
      print("parallelizing docking over worker pool")

      worker_pool.map(dock_ligand_to_receptor_partial, ligands)
Пример #49
0
def load_molecule(molecule_file, add_hydrogens=True, calc_charges=False):
  return rdkit_util.load_molecule(molecule_file, add_hydrogens, calc_charges)
Пример #50
0
  def generate_poses(self,
                     protein_file,
                     ligand_file,
                     centroid=None,
                     box_dims=None,
                     dry_run=False,
                     out_dir=None):
    """Generates the docked complex and outputs files for docked complex."""
    if out_dir is None:
      out_dir = tempfile.mkdtemp()

    # Prepare receptor
    receptor_name = os.path.basename(protein_file).split(".")[0]
    protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name)
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name)
    hydrogenate_and_compute_partial_charges(
        protein_file,
        "pdb",
        hyd_output=protein_hyd,
        pdbqt_output=protein_pdbqt,
        protein=True)
    # Get protein centroid and range
    # TODO(rbharath): Need to add some way to identify binding pocket, or this is
    # going to be extremely slow!
    if centroid is not None and box_dims is not None:
      protein_centroid = centroid
    else:
      if not self.detect_pockets:
        receptor_mol = rdkit_util.load_molecule(
            protein_hyd, calc_charges=False, add_hydrogens=False)
        protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])
        box_dims = protein_range + 5.0
      else:
        logger.info("About to find putative binding pockets")
        pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets(
            protein_file, ligand_file)
        # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting
        # first pocket.
        logger.info("Computing centroid and size of proposed pocket.")
        pocket_coord = pocket_coords[0]
        protein_centroid = np.mean(pocket_coord, axis=1)
        pocket = pockets[0]
        (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
        x_box = (x_max - x_min) / 2.
        y_box = (y_max - y_min) / 2.
        z_box = (z_max - z_min) / 2.
        box_dims = (x_box, y_box, z_box)

    # Prepare receptor
    ligand_name = os.path.basename(ligand_file).split(".")[0]
    ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name)
    ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

    # TODO(rbharath): Generalize this so can support mol2 files as well.
    hydrogenate_and_compute_partial_charges(
        ligand_file,
        "sdf",
        hyd_output=ligand_hyd,
        pdbqt_output=ligand_pdbqt,
        protein=False)
    # Write Vina conf file
    conf_file = os.path.join(out_dir, "conf.txt")
    write_conf(
        protein_pdbqt,
        ligand_pdbqt,
        protein_centroid,
        box_dims,
        conf_file,
        exhaustiveness=self.exhaustiveness)

    # Define locations of log and output files
    log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
    out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
    # TODO(rbharath): Let user specify the number of poses required.
    if not dry_run:
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)
    # TODO(rbharath): Convert the output pdbqt to a pdb file.

    # Return docked files
    return protein_hyd, out_pdbqt