示例#1
0
    def _featurize(self, datapoint, **kwargs):  # -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )
        try:
            fragments = rdkit_utils.load_complex(datapoint,
                                                 add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    sum([
                        voxelize(convert_atom_pair_to_voxel,
                                 hash_function=None,
                                 box_width=self.box_width,
                                 voxel_width=self.voxel_width,
                                 coordinates=xyz,
                                 feature_list=hbond_list,
                                 nb_channel=1) for xyz in xyzs
                    ]) for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
  def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray:
    """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
    molecular_complex = (mol_pdb, protein_pdb)
    try:
      fragments = rdkit_utils.load_complex(
          molecular_complex, add_hydrogens=False)

    except MoleculeLoadException:
      logger.warning("This molecule cannot be loaded by Rdkit. Returning None")
      return None
    pairwise_features = []
    # We compute pairwise contact fingerprints
    centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
    if self.reduce_to_contacts:
      fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff)
    for (frag1_ind, frag2_ind) in itertools.combinations(
        range(len(fragments)), 2):
      frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
      distances = compute_pairwise_distances(frag1[0], frag2[0])
      frag1_xyz = subtract_centroid(frag1[0], centroid)
      frag2_xyz = subtract_centroid(frag2[0], centroid)
      xyzs = [frag1_xyz, frag2_xyz]
      # rdks = [frag1[1], frag2[1]]
      pairwise_features.append(
          np.concatenate(
              [
                  sum([
                      voxelize(
                          convert_atom_pair_to_voxel,
                          hash_function=None,
                          box_width=self.box_width,
                          voxel_width=self.voxel_width,
                          coordinates=xyz,
                          feature_list=hbond_list,
                          nb_channel=1) for xyz in xyzs
                  ]) for hbond_list in compute_hydrogen_bonds(
                      frag1, frag2, distances, self.distance_bins,
                      self.angle_cutoffs)
              ],
              axis=-1))
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
    return np.concatenate(pairwise_features, axis=-1)
示例#3
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=None,
                             coordinates=xyz,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_list=compute_salt_bridges(
                                 frag1[1],
                                 frag2[1],
                                 distances,
                                 cutoff=self.cutoff),
                             nb_channel=1) for xyz in xyzs
                ]))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
示例#4
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        # We compute pairwise contact fingerprints
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            # frag1_xyz = subtract_centroid(frag1[0], centroid)
            # frag2_xyz = subtract_centroid(frag2[0], centroid)
            # xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    np.array([len(hbond_list)])
                    for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)