Exemplo n.º 1
0
    def _initialize_coordinates_and_PdbCreator(self):
        if len(self.coords) == 0:
            self.build()

        if not self.pdb_creator:
            from sidechainnet.structure.PdbBuilder import PdbBuilder
            if self.coord_type == 'numpy':
                self.pdb_creator = PdbBuilder(self.seq, self.coords)
            else:
                self.pdb_creator = PdbBuilder(self.seq, self.coords.numpy())
Exemplo n.º 2
0
    def _initialize_coordinates_and_PdbCreator(self):
        if self.coords is None or len(self.coords) == 0:
            self.build()

        if not self.pdb_creator:
            from sidechainnet.structure.PdbBuilder import PdbBuilder
            if self.data_type == 'numpy':
                self.pdb_creator = PdbBuilder(self.seq_as_str, self.coords)
            else:
                self.pdb_creator = PdbBuilder(self.seq_as_str,
                                              self.coords.detach().numpy())
Exemplo n.º 3
0
class StructureBuilder(object):
    """Reconstruct a protein's structure given its sequence and angles or coordinates.

    The hydroxyl-oxygen of terminal residues is not placed because this would
    mean that the number of coordinates per residue would not be constant, or
    cause other complications (i.e. what if the last atom of a structure is not
    really a terminal atom because it's tail is masked out?).
    """
    def __init__(self, seq, ang=None, crd=None, device=torch.device("cpu")):
        """Initialize a StructureBuilder for a single protein. Does not build coordinates.

        To generate coordinates after initialization, see build().
        To create PDB/GLTF files or to generate a py3Dmol visualization, see
        to_{pdb,gltf,3Dmol}.

        Args:
            seq: An integer tensor or a string of length L that represents the protein's
                amino acid sequence.
            ang: A float tensor (L X NUM_PREDICTED_ANGLES) that contains all of the
                protein's interior angles.
            crd: A float tensor ((L X NUM_COORDS_PER_RES) X 3) that contains all of the
                protein's atomic coordinates. Each residue must contain the same number
                of coordinates, with empty coordinate entries padded with 0-vectors.
            device: An optional torch device on which to build the structure.
        """
        # Validate input data
        if (ang is None and crd is None) or (ang is not None
                                             and crd is not None):
            raise ValueError(
                "You must provide exactly one of either coordinates (crd) "
                "or angles (ang).")
        # Perhaps the user mistakenly passed coordinates for the angle arguments
        if ang is not None and crd is None and ang.shape[-1] == 3:
            crd = ang.copy()
            ang = None
        if ang is not None and ang.shape[-1] != NUM_ANGLES:
            raise ValueError(
                f"Angle matrix dimensions must match (L x {NUM_ANGLES}). "
                f"You have provided {tuple(ang.shape)}.")
        if (crd is not None and crd.shape[-1] != 3):
            raise ValueError(
                f"Coordinate matrix dimensions must match (L x 3). "
                f"You have provided {tuple(crd.shape)}.")
        if (crd is not None
                and (crd.shape[0] // NUM_COORDS_PER_RES) != len(seq)):
            raise ValueError(
                f"The length of the coordinate matrix must match the sequence length "
                f"times {NUM_COORDS_PER_RES}. You have provided {crd.shape[0]} // "
                f"{NUM_COORDS_PER_RES} = {crd.shape[0] // NUM_COORDS_PER_RES}."
            )
        if ang is not None and np.any(np.all(ang == 0, axis=1)):
            missing_loc = np.where(np.all(ang == 0, axis=1))
            raise ValueError(
                f"Building atomic coordinates from angles is not supported "
                f"for structures with missing residues. Missing residues = "
                f"{list(missing_loc[0])}. Protein structures with missing "
                "residues are only supported if built directly from "
                "coordinates (also supported by StructureBuilder).")
        if crd is not None:
            self.coords = crd
            self.coord_type = "numpy" if type(crd) is np.ndarray else 'torch'
        else:
            self.coords = []
            self.coord_type = "numpy" if type(ang) is np.ndarray else 'torch'

        self.seq = seq
        self.ang = ang
        self.device = device

        self.prev_ang = None
        self.prev_bb = None
        self.next_bb = None

        self.pdb_creator = None
        self.integer_coded_seq = np.asarray([VOCAB._char2int[s] for s in seq])

    def __len__(self):
        """Return length of the protein sequence.

        Returns:
            int: Integer sequence length.
        """
        return len(self.seq)

    def _iter_resname_angs(self, start=0):
        for resname, angles in zip(self.integer_coded_seq[start:],
                                   self.ang[start:]):
            yield resname, angles

    def _build_first_two_residues(self):
        """Construct the first two residues of the protein."""
        resname_ang_iter = self._iter_resname_angs()
        first_resname, first_ang = next(resname_ang_iter)
        second_resname, second_ang = next(resname_ang_iter)
        first_res = ResidueBuilder(first_resname,
                                   first_ang,
                                   prev_res=None,
                                   next_res=None)
        second_res = ResidueBuilder(second_resname,
                                    second_ang,
                                    prev_res=first_res,
                                    next_res=None)

        # After building both backbones use the second residue's N to build the first's CB
        first_res.build_bb()
        second_res.build()
        first_res.next_res = second_res
        first_res.build_sc()

        return first_res, second_res

    def build(self):
        """Construct all of the atoms for a residue.

        Special care must be taken for the first residue in the sequence in
        order to place its CB, if present.

        Returns:
            (numpy.ndarray, torch.Tensor): An array or tensor of the generated coordinates
            with shape ((L X NUM_COORDS_PER_RES) X 3).
        """
        # If a StructureBuilder does not have angles, build returns its coordinates
        if self.ang is None:
            return self.coords

        # Build the first and second residues, a special case
        first, second = self._build_first_two_residues()

        # Combine the coordinates and build the rest of the protein
        self.coords = first._stack_coords() + second._stack_coords()

        # Build the rest of the structure
        prev_res = second
        for i, (resname, ang) in enumerate(self._iter_resname_angs(start=2)):
            res = ResidueBuilder(resname,
                                 ang,
                                 prev_res=prev_res,
                                 next_res=None,
                                 is_last_res=i + 2 == len(self.seq) - 1)
            self.coords += res.build()
            prev_res = res

        if self.coord_type == 'torch':
            self.coords = torch.stack(self.coords)
        else:
            self.coords = np.stack(self.coords)

        return self.coords

    def _initialize_coordinates_and_PdbCreator(self):
        if len(self.coords) == 0:
            self.build()

        if not self.pdb_creator:
            from sidechainnet.structure.PdbBuilder import PdbBuilder
            if self.coord_type == 'numpy':
                self.pdb_creator = PdbBuilder(self.seq, self.coords)
            else:
                self.pdb_creator = PdbBuilder(self.seq, self.coords.numpy())

    def to_pdb(self, path, title="pred"):
        """Save protein structure as a PDB file to given path.

        Args:
            path (str): Path to save PDB file.
            title (str, optional): Title of structure for PDB file. Defaults to "pred".
        """
        self._initialize_coordinates_and_PdbCreator()
        self.pdb_creator.save_pdb(path, title)

    def to_gltf(self, path, title="pred"):
        """Save protein structure as a GLTF (3D-object) file to given path.

        Args:
            path (str): Path to save GLTF file.
            title (str, optional): Title of structure for GLTF file. Defaults to "pred".
        """
        self._initialize_coordinates_and_PdbCreator()
        self.pdb_creator.save_gltf(path, title)

    def to_3Dmol(self, style=None, **kwargs):
        """Generate protein structure & return interactive py3Dmol.view for visualization.

        Args:
            style (str, optional): Style string to be passed to py3Dmol for
                visualization. Defaults to None.

        Returns:
            py3Dmol.view object: A view object that is interactive in iPython notebook
                settings.
        """
        import py3Dmol
        if not style:
            style = {
                'cartoon': {
                    'color': 'spectrum'
                },
                'stick': {
                    'radius': .15
                }
            }
        self._initialize_coordinates_and_PdbCreator()

        view = py3Dmol.view(**kwargs)
        view.addModel(self.pdb_creator.get_pdb_string(), 'pdb')
        if style:
            view.setStyle(style)
        view.zoomTo()
        return view