示例#1
0
    def _featurize(self, datapoint, **kwargs):  # -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )
        try:
            fragments = rdkit_utils.load_complex(datapoint,
                                                 add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    sum([
                        voxelize(convert_atom_pair_to_voxel,
                                 hash_function=None,
                                 box_width=self.box_width,
                                 voxel_width=self.voxel_width,
                                 coordinates=xyz,
                                 feature_list=hbond_list,
                                 nb_channel=1) for xyz in xyzs
                    ]) for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
示例#2
0
    def _featurize(self, mol_pdb: str, protein_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, protein_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features: List[np.ndarray] = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_to_voxel,
                             xyz,
                             self.box_width,
                             self.voxel_width,
                             hash_function=hash_ecfp,
                             feature_dict=ecfp_dict,
                             nb_channel=self.size)
                    for xyz, ecfp_dict in zip(
                        xyzs,
                        featurize_contacts_ecfp(frag1,
                                                frag2,
                                                distances,
                                                cutoff=self.cutoff,
                                                ecfp_degree=self.radius))
                ]))
        if self.flatten:
            return np.concatenate(
                [features.flatten() for features in pairwise_features])
        else:
            # Features are of shape (voxels_per_edge, voxels_per_edge,
            # voxels_per_edge, num_feat) so we should concatenate on the last
            # axis.
            return np.concatenate(pairwise_features, axis=-1)
  def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray:
    """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
    molecular_complex = (mol_pdb, protein_pdb)
    try:
      fragments = rdkit_utils.load_complex(
          molecular_complex, add_hydrogens=False)

    except MoleculeLoadException:
      logger.warning("This molecule cannot be loaded by Rdkit. Returning None")
      return None
    pairwise_features = []
    # We compute pairwise contact fingerprints
    centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
    if self.reduce_to_contacts:
      fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff)
    for (frag1_ind, frag2_ind) in itertools.combinations(
        range(len(fragments)), 2):
      frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
      distances = compute_pairwise_distances(frag1[0], frag2[0])
      frag1_xyz = subtract_centroid(frag1[0], centroid)
      frag2_xyz = subtract_centroid(frag2[0], centroid)
      xyzs = [frag1_xyz, frag2_xyz]
      # rdks = [frag1[1], frag2[1]]
      pairwise_features.append(
          np.concatenate(
              [
                  sum([
                      voxelize(
                          convert_atom_pair_to_voxel,
                          hash_function=None,
                          box_width=self.box_width,
                          voxel_width=self.voxel_width,
                          coordinates=xyz,
                          feature_list=hbond_list,
                          nb_channel=1) for xyz in xyzs
                  ]) for hbond_list in compute_hydrogen_bonds(
                      frag1, frag2, distances, self.distance_bins,
                      self.angle_cutoffs)
              ],
              axis=-1))
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
    return np.concatenate(pairwise_features, axis=-1)
示例#4
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            # distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_to_voxel,
                             hash_function=None,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             coordinates=xyz,
                             feature_dict=cation_pi_dict,
                             nb_channel=1) for xyz, cation_pi_dict in zip(
                                 xyzs,
                                 compute_binding_pocket_cation_pi(
                                     frag1[1],
                                     frag2[1],
                                     dist_cutoff=self.cutoff,
                                     angle_cutoff=self.angle_cutoff,
                                 ))
                ]))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
示例#5
0
    def _featurize(self, datapoint, **kwargs):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )

        try:
            fragments = load_complex(datapoint, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
示例#6
0
 def test_subract_centroid(self):
     N = 10
     coords = np.random.rand(N, 3)
     centroid = geometry_utils.compute_centroid(coords)
     new_coords = geometry_utils.subtract_centroid(coords, centroid)
     assert new_coords.shape == (N, 3)
     new_centroid = geometry_utils.compute_centroid(new_coords)
     assert new_centroid.shape == (3, )
     np.testing.assert_almost_equal(new_centroid,
                                    np.zeros_like(new_centroid))
    def _featurize(self, mol_pdb: str, complex_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    complex_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, complex_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
示例#8
0
    def _featurize(self, complex):
        """Computes grid featurization of protein/ligand complex.

    Takes as input filenames pdb of the protein, pdb of the ligand.

    This function then computes the centroid of the ligand; decrements this
    centroid from the atomic coordinates of protein and ligand atoms, and then
    merges the translated protein and ligand. This combined system/complex is
    then saved.

    This function then computes a featurization with scheme specified by the user.

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            mol_pdb_file, protein_pdb_file = complex
            time1 = time.time()

            protein_xyz, protein_rdk = load_molecule(protein_pdb_file,
                                                     calc_charges=True,
                                                     sanitize=self.sanitize)
            time2 = time.time()
            logger.info(
                "TIMING: Loading protein coordinates took %0.3f s" %
                (time2 - time1), self.verbose)
            time1 = time.time()
            ligand_xyz, ligand_rdk = load_molecule(mol_pdb_file,
                                                   calc_charges=True,
                                                   sanitize=self.sanitize)
            time2 = time.time()
            logger.info(
                "TIMING: Loading ligand coordinates took %0.3f s" %
                (time2 - time1), self.verbose)
        except MoleculeLoadException:
            logger.warning(
                "Some molecules cannot be loaded by Rdkit. Skipping")
            return None

        time1 = time.time()
        centroid = compute_centroid(ligand_xyz)
        ligand_xyz = subtract_centroid(ligand_xyz, centroid)
        protein_xyz = subtract_centroid(protein_xyz, centroid)
        time2 = time.time()
        logger.info(
            "TIMING: Centroid processing took %0.3f s" % (time2 - time1),
            self.verbose)

        pairwise_distances = compute_pairwise_distances(
            protein_xyz, ligand_xyz)

        transformed_systems = {}
        transformed_systems[(0, 0)] = [protein_xyz, ligand_xyz]

        for i in range(self.nb_rotations):
            rotated_system = rotate_molecules([protein_xyz, ligand_xyz])
            transformed_systems[(i + 1, 0)] = rotated_system

        features_dict = {}
        for system_id, (protein_xyz,
                        ligand_xyz) in transformed_systems.items():
            feature_arrays = []
            for is_flat, function_name in self.feature_types:

                result = self._compute_feature(
                    function_name,
                    protein_xyz,
                    protein_rdk,
                    ligand_xyz,
                    ligand_rdk,
                    pairwise_distances,
                )
                feature_arrays += result

                if self.flatten:
                    features_dict[system_id] = np.concatenate([
                        feature_array.flatten()
                        for feature_array in feature_arrays
                    ])
                else:
                    features_dict[system_id] = np.concatenate(feature_arrays,
                                                              axis=-1)

        # TODO(rbharath): Is this squeeze OK?
        features = np.squeeze(np.array(list(features_dict.values())))
        return features