def _initialize_coordinates_and_PdbCreator(self): if len(self.coords) == 0: self.build() if not self.pdb_creator: from sidechainnet.structure.PdbBuilder import PdbBuilder if self.coord_type == 'numpy': self.pdb_creator = PdbBuilder(self.seq, self.coords) else: self.pdb_creator = PdbBuilder(self.seq, self.coords.numpy())
def _initialize_coordinates_and_PdbCreator(self): if self.coords is None or len(self.coords) == 0: self.build() if not self.pdb_creator: from sidechainnet.structure.PdbBuilder import PdbBuilder if self.data_type == 'numpy': self.pdb_creator = PdbBuilder(self.seq_as_str, self.coords) else: self.pdb_creator = PdbBuilder(self.seq_as_str, self.coords.detach().numpy())
class StructureBuilder(object): """Reconstruct a protein's structure given its sequence and angles or coordinates. The hydroxyl-oxygen of terminal residues is not placed because this would mean that the number of coordinates per residue would not be constant, or cause other complications (i.e. what if the last atom of a structure is not really a terminal atom because it's tail is masked out?). """ def __init__(self, seq, ang=None, crd=None, device=torch.device("cpu")): """Initialize a StructureBuilder for a single protein. Does not build coordinates. To generate coordinates after initialization, see build(). To create PDB/GLTF files or to generate a py3Dmol visualization, see to_{pdb,gltf,3Dmol}. Args: seq: An integer tensor or a string of length L that represents the protein's amino acid sequence. ang: A float tensor (L X NUM_PREDICTED_ANGLES) that contains all of the protein's interior angles. crd: A float tensor ((L X NUM_COORDS_PER_RES) X 3) that contains all of the protein's atomic coordinates. Each residue must contain the same number of coordinates, with empty coordinate entries padded with 0-vectors. device: An optional torch device on which to build the structure. """ # Validate input data if (ang is None and crd is None) or (ang is not None and crd is not None): raise ValueError( "You must provide exactly one of either coordinates (crd) " "or angles (ang).") # Perhaps the user mistakenly passed coordinates for the angle arguments if ang is not None and crd is None and ang.shape[-1] == 3: crd = ang.copy() ang = None if ang is not None and ang.shape[-1] != NUM_ANGLES: raise ValueError( f"Angle matrix dimensions must match (L x {NUM_ANGLES}). " f"You have provided {tuple(ang.shape)}.") if (crd is not None and crd.shape[-1] != 3): raise ValueError( f"Coordinate matrix dimensions must match (L x 3). " f"You have provided {tuple(crd.shape)}.") if (crd is not None and (crd.shape[0] // NUM_COORDS_PER_RES) != len(seq)): raise ValueError( f"The length of the coordinate matrix must match the sequence length " f"times {NUM_COORDS_PER_RES}. You have provided {crd.shape[0]} // " f"{NUM_COORDS_PER_RES} = {crd.shape[0] // NUM_COORDS_PER_RES}." ) if ang is not None and np.any(np.all(ang == 0, axis=1)): missing_loc = np.where(np.all(ang == 0, axis=1)) raise ValueError( f"Building atomic coordinates from angles is not supported " f"for structures with missing residues. Missing residues = " f"{list(missing_loc[0])}. Protein structures with missing " "residues are only supported if built directly from " "coordinates (also supported by StructureBuilder).") if crd is not None: self.coords = crd self.coord_type = "numpy" if type(crd) is np.ndarray else 'torch' else: self.coords = [] self.coord_type = "numpy" if type(ang) is np.ndarray else 'torch' self.seq = seq self.ang = ang self.device = device self.prev_ang = None self.prev_bb = None self.next_bb = None self.pdb_creator = None self.integer_coded_seq = np.asarray([VOCAB._char2int[s] for s in seq]) def __len__(self): """Return length of the protein sequence. Returns: int: Integer sequence length. """ return len(self.seq) def _iter_resname_angs(self, start=0): for resname, angles in zip(self.integer_coded_seq[start:], self.ang[start:]): yield resname, angles def _build_first_two_residues(self): """Construct the first two residues of the protein.""" resname_ang_iter = self._iter_resname_angs() first_resname, first_ang = next(resname_ang_iter) second_resname, second_ang = next(resname_ang_iter) first_res = ResidueBuilder(first_resname, first_ang, prev_res=None, next_res=None) second_res = ResidueBuilder(second_resname, second_ang, prev_res=first_res, next_res=None) # After building both backbones use the second residue's N to build the first's CB first_res.build_bb() second_res.build() first_res.next_res = second_res first_res.build_sc() return first_res, second_res def build(self): """Construct all of the atoms for a residue. Special care must be taken for the first residue in the sequence in order to place its CB, if present. Returns: (numpy.ndarray, torch.Tensor): An array or tensor of the generated coordinates with shape ((L X NUM_COORDS_PER_RES) X 3). """ # If a StructureBuilder does not have angles, build returns its coordinates if self.ang is None: return self.coords # Build the first and second residues, a special case first, second = self._build_first_two_residues() # Combine the coordinates and build the rest of the protein self.coords = first._stack_coords() + second._stack_coords() # Build the rest of the structure prev_res = second for i, (resname, ang) in enumerate(self._iter_resname_angs(start=2)): res = ResidueBuilder(resname, ang, prev_res=prev_res, next_res=None, is_last_res=i + 2 == len(self.seq) - 1) self.coords += res.build() prev_res = res if self.coord_type == 'torch': self.coords = torch.stack(self.coords) else: self.coords = np.stack(self.coords) return self.coords def _initialize_coordinates_and_PdbCreator(self): if len(self.coords) == 0: self.build() if not self.pdb_creator: from sidechainnet.structure.PdbBuilder import PdbBuilder if self.coord_type == 'numpy': self.pdb_creator = PdbBuilder(self.seq, self.coords) else: self.pdb_creator = PdbBuilder(self.seq, self.coords.numpy()) def to_pdb(self, path, title="pred"): """Save protein structure as a PDB file to given path. Args: path (str): Path to save PDB file. title (str, optional): Title of structure for PDB file. Defaults to "pred". """ self._initialize_coordinates_and_PdbCreator() self.pdb_creator.save_pdb(path, title) def to_gltf(self, path, title="pred"): """Save protein structure as a GLTF (3D-object) file to given path. Args: path (str): Path to save GLTF file. title (str, optional): Title of structure for GLTF file. Defaults to "pred". """ self._initialize_coordinates_and_PdbCreator() self.pdb_creator.save_gltf(path, title) def to_3Dmol(self, style=None, **kwargs): """Generate protein structure & return interactive py3Dmol.view for visualization. Args: style (str, optional): Style string to be passed to py3Dmol for visualization. Defaults to None. Returns: py3Dmol.view object: A view object that is interactive in iPython notebook settings. """ import py3Dmol if not style: style = { 'cartoon': { 'color': 'spectrum' }, 'stick': { 'radius': .15 } } self._initialize_coordinates_and_PdbCreator() view = py3Dmol.view(**kwargs) view.addModel(self.pdb_creator.get_pdb_string(), 'pdb') if style: view.setStyle(style) view.zoomTo() return view