def test_dihedral_backbone_result(file_name): import mdtraj mmtf_file = mmtf.MMTFFile.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) array = array[struc.filter_amino_acids(array)] if array.array_length() == 0: # Structure contains no protein # -> determination of backbone angles makes no sense return for chain in struc.chain_iter(array): print("Chain: ", chain.chain_id[0]) if len(struc.check_res_id_continuity(chain)) != 0: # Do not test discontinuous chains return test_phi, test_psi, test_ome = struc.dihedral_backbone(chain) temp = NamedTemporaryFile("w+", suffix=".pdb") strucio.save_structure(temp.name, chain) traj = mdtraj.load(temp.name) temp.close() _, ref_phi = mdtraj.compute_phi(traj) _, ref_psi = mdtraj.compute_psi(traj) _, ref_ome = mdtraj.compute_omega(traj) ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0] assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3) assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3) assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
def test_get_assembly(single_model): """ Test whether the :func:`get_assembly()` function produces the same number of peptide chains as the ``_pdbx_struct_assembly.oligomeric_count`` field indicates. """ model = 1 if single_model else None path = join(data_dir, "1f2n.cif") pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) assembly_category = pdbx_file.get_category("pdbx_struct_assembly", expect_looped=True) # Test each available assembly for id, ref_oligomer_count in zip(assembly_category["id"], assembly_category["oligomeric_count"]): assembly = pdbx.get_assembly(pdbx_file, assembly_id=id, model=model) protein_assembly = assembly[..., struc.filter_amino_acids(assembly)] test_oligomer_count = struc.get_chain_count(protein_assembly) if single_model: assert isinstance(assembly, struc.AtomArray) else: assert isinstance(assembly, struc.AtomArrayStack) assert test_oligomer_count == int(ref_oligomer_count)
def test_hbond_structure(pdb_id): file_name = join(data_dir("structure"), pdb_id + ".mmtf") array = load_structure(file_name) # Only consider amino acids for consistency # with bonded hydrogen detection in MDTraj array = array[..., struc.filter_amino_acids(array)] if isinstance(array, struc.AtomArrayStack): # For consistency with MDTraj 'S' cannot be acceptor element # https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/hbond.py#L365 triplets, mask = struc.hbond(array, acceptor_elements=("O", "N")) else: triplets = struc.hbond(array, acceptor_elements=("O", "N")) # Save to new pdb file for consistent treatment of inscode/altloc # im MDTraj temp = NamedTemporaryFile("w+", suffix=".pdb") save_structure(temp.name, array) # Compare with MDTraj import mdtraj traj = mdtraj.load(temp.name) temp.close() triplets_ref = mdtraj.baker_hubbard(traj, freq=0, periodic=False) # Both packages may use different order # -> use set for comparison triplets_set = set([tuple(tri) for tri in triplets]) triplets_ref_set = set([tuple(tri) for tri in triplets_ref]) assert triplets_set == triplets_ref_set
def test_dihedral_backbone_result(file_name): import mdtraj mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) array = array[struc.filter_amino_acids(array)] for chain in struc.chain_iter(array): print("Chain: ", chain.chain_id[0]) if len(struc.check_id_continuity(chain)) != 0: # Do not test discontinuous chains return test_phi, test_psi, test_ome = struc.dihedral_backbone(chain) temp_file_name = biotite.temp_file("pdb") strucio.save_structure(temp_file_name, chain) traj = mdtraj.load(temp_file_name) _, ref_phi = mdtraj.compute_phi(traj) _, ref_psi = mdtraj.compute_psi(traj) _, ref_ome = mdtraj.compute_omega(traj) ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0] assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3) assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3) assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
def psea_sec(file): array = mmtf.get_structure(file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] tk_mono = tk_dimer[tk_dimer.chain_id == "A"] sse = struc.annotate_sse(array, chain_id="A") return sse
def dssp_sec(file): array = mmtf.get_structure(file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] tk_mono = tk_dimer[tk_dimer.chain_id == "A"] sse = dssp.DsspApp.annotate_sse(tk_mono) sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1") return sse
def test_outside_location(): # Test result for location outside any cell array = strucio.load_structure(join(data_dir, "3o5r.mmtf")) array = array[struc.filter_amino_acids(array)] cell_list = struc.CellList(array, cell_size=5) outside_coord = np.min(array.coord, axis=0) - 100 # Expect empty array assert len(cell_list.get_atoms(outside_coord, 5)) == 0
def test_remove_pbc_selections(multi_model): """ This test makes no assertions, it only test whether an exception occurs, when the `selection` parameter is given in `remove_pbc()`. """ array = load_structure(join(data_dir("structure"), "3o5r.mmtf")) if multi_model: array = struc.stack([array, array]) struc.remove_pbc(array) struc.remove_pbc(array, array.chain_id[0]) struc.remove_pbc(array, struc.filter_amino_acids(array)) struc.remove_pbc( array, [struc.filter_amino_acids(array), (array.res_name == "FK5")]) # Expect error when selectinf an atom multiple times with pytest.raises(ValueError): struc.remove_pbc( array, [struc.filter_amino_acids(array), (array.atom_name == "CA")])
def mmtf_sec(file): array = mmtf.get_structure(file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] tk_mono = tk_dimer[tk_dimer.chain_id == "A"] chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)] sse = file["secStructList"] sse = sse[sse != -1] sse = sse[: len(chain_id_per_res)][chain_id_per_res == "A"] sse = np.array([sec_struct_codes[code] for code in sse if code != -1], dtype="U1") sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1") return sse
def get_diameter(pdb_id): file_name = rcsb.fetch(pdb_id, "mmtf", gettempdir()) atom_array = strucio.load_structure(file_name) # Remove all non-amino acids atom_array = atom_array[struc.filter_amino_acids(atom_array)] coord = atom_array.coord # Calculate all pairwise difference vectors diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :] # Calculate absolute of difference vectors -> square distances sq_dist = np.sum(diff*diff, axis=-1) # Maximum distance is diameter diameter = np.sqrt(np.max(sq_dist)) return diameter
def test_protOr_radii(): """ Assert that ProtOr VdW radii (except hydrogen) can be calculated for all atoms in the given structure, since the structure (1GYA) does only contain standard amino acids after the removal of glycosylation. This means, that none of the resulting radii should be the None. """ array = load_structure(join(data_dir("structure"), "1gya.mmtf")) array = array[..., array.element != "H"] array = array[..., struc.filter_amino_acids(array)] for res_name, atom_name in zip(array.res_name, array.atom_name): radius = strucinfo.vdw_radius_protor(res_name, atom_name) assert isinstance(radius, float) assert radius != None
def Diameter(TheFile): ''' Find the diameter of a protein's structure approximately, requires less memory thus good for big structures ''' # Get atom coordinates atom_array = strucio.load_structure(TheFile) # Remove all non-amino acids atoms atom_array = atom_array[struc.filter_amino_acids(atom_array)] # Coordinates as a NumPy array coord = atom_array.coord # Find geometric center center = np.mean(coord, axis=0) # Find largest distance from center -> diameter diameter = 2 * np.sqrt(np.sum((coord - center)**2, axis=-1)).max() return (round(diameter, 3))
def analyze_chirality(array): # Filter backbone + CB array = array[struc.filter_amino_acids(array)] array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))] # Iterate over each residue ids, names = struc.get_residues(array) enantiomers = np.zeros(len(ids), dtype=int) for i, id in enumerate(ids): coord = array.coord[array.res_id == id] if len(coord) != 4: # Glyine -> no chirality enantiomers[i] = 0 else: enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2], coord[3]) return enantiomers
def test_adjacency_matrix(cell_size, threshold): array = strucio.load_structure(join(data_dir, "3o5r.mmtf")) array = array[struc.filter_amino_acids(array)] cell_list = struc.CellList(array, cell_size=cell_size) matrix = cell_list.create_adjacency_matrix(threshold) coord = array.coord # Create distance matrix diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :] # Convert to float64 to avoid errorenous warning # https://github.com/ContinuumIO/anaconda-issues/issues/9129 diff = diff.astype(np.float64) distance = np.sqrt(np.sum(diff**2, axis=-1)) # Create adjacency matrix from distance matrix expected_matrix = (distance <= threshold) # Both ways to create an adjacency matrix # should give the same result assert matrix.tolist() == expected_matrix.tolist()
def test_get_assembly(model): """ Test whether the :func:`get_assembly()` function produces the same number of peptide chains as the ``_pdbx_struct_assembly.oligomeric_count`` field indicates. Furthermore, check if the number of atoms in the entire assembly is a multiple of the numbers of atoms in a monomer. """ path = join(data_dir("structure"), "1f2n.cif") pdbx_file = pdbx.PDBxFile.read(path) assembly_category = pdbx_file.get_category("pdbx_struct_assembly", expect_looped=True) # Test each available assembly for id, ref_oligomer_count in zip(assembly_category["id"], assembly_category["oligomeric_count"]): print("Assembly ID:", id) try: assembly = pdbx.get_assembly(pdbx_file, assembly_id=id, model=model) except biotite.InvalidFileError: if model is None: # The file cannot be parsed into an AtomArrayStack, # as the models contain different numbers of atoms # -> skip this test case return else: raise protein_assembly = assembly[..., struc.filter_amino_acids(assembly)] test_oligomer_count = struc.get_chain_count(protein_assembly) if model is None: assert isinstance(assembly, struc.AtomArrayStack) else: assert isinstance(assembly, struc.AtomArray) assert test_oligomer_count == int(ref_oligomer_count) # The atom count of the entire assembly should be a multiple # a monomer, monomer_atom_count = pdbx.get_structure(pdbx_file).array_length() assert assembly.array_length() % monomer_atom_count == 0
def interacting_pairs(structure_path: str, distance_threshold: float, atom_name: str = 'CA', positions: t.Optional[t.Iterable[int]] = None): """ Finds residues in structure within distance threshold. :param structure_path: path to a structure file :param distance_threshold: min distance between elements (non-inclusive) :param atom_name: filter atoms to this names (CA, CB, and so on) :param positions: filter positions to the ones in this list :return: numpy array with shape (N, 2) where N is a number of interacting pairs """ st = io.load_structure(structure_path) ca = st[(st.atom_name == atom_name) & bst.filter_amino_acids(st)] if positions is not None: ca = ca[np.isin(ca.res_id, list(positions))] pairs = np.array(list(combinations(np.unique(ca.res_id), 2))) pairs_idx = np.array(list(combinations(np.arange(len(ca)), 2))) dist = bst.index_distance(ca, pairs_idx) return pairs[dist < distance_threshold]
def test_coarse_grained(pdb_id): # Multi atom SASA (ProtOr), compare with single atom SASA # on residue level file = mmtf.MMTFFile.read(join(data_dir("structure"), pdb_id + ".mmtf")) array = mmtf.get_structure(file, model=1) array = array[struc.filter_amino_acids(array)] sasa = struc.apply_residue_wise(array, struc.sasa(array, vdw_radii="ProtOr"), np.nansum) sasa_exp = struc.apply_residue_wise(array, struc.sasa(array, vdw_radii="Single"), np.nansum) # Assert that more than 90% of atoms # have less than 10% SASA difference assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=1e-1, atol=1)) / len(sasa) > 0.9 # Assert that more than 98% of atoms # have less than 40% SASA difference assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=4e-1, atol=1)) / len(sasa) > 0.98
def DiameterA(TheFile): ''' Find the diameter of a protein's structure accuratly, requires lots of memory and crashes for big structures. Here we broadcast the array against itself, calculating all pairwise distances between points. This is a bad idea, because we have N*(N-1) = (1e6)**2 = 1 trillion pairs! This will raise a MemoryError for N=1 million, as it requires half a million gigabytes!! ''' # Get atom coordinates atom_array = strucio.load_structure(TheFile) # Remove all non-amino acids atoms atom_array = atom_array[struc.filter_amino_acids(atom_array)] # Coordinates as a NumPy array coord = atom_array.coord # Calculate all pairwise difference vectors diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :] # Calculate absolute of difference vectors -> square distances sq_dist = np.sum(diff * diff, axis=-1) # Get maximum distance maxdist = np.max(sq_dist) # Maximum distance is diameter diameter = np.sqrt(np.max(sq_dist)) return (round(diameter, 3))
def build_patterns(structfam, folder): patterns = [] for pdb, c, start, end in tqdm(structfam): file_name = rcsb.fetch(pdb, "mmtf", biotite.temp_dir()) mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] # The chain ID corresponding to each residue chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)] sse = mmtf_file["secStructList"] sse = sse[:chain_id_per_res.shape[0]][chain_id_per_res == c] sse = np.array(sse[start:end + 1]) sse = np.array([sec_struct_codes[code % 8] for code in sse], dtype="U1") sse8 = to_onehot([dssp_codes[x] for x in sse], (None, 8)) dss8 = (sse8[1:] - sse8[:-1]) cls = to_onehot(np.where(dss8 == -1)[1], (None, 8)).T bbox = np.array( [np.where(dss8 == 1)[0], np.where(dss8 == -1)[0], *cls]).T pat8 = np.argmax(bbox[:, 2:], 1) sse3 = to_onehot([abc_codes[dssp_to_abc[x]] for x in sse], (None, 3)) dss3 = (sse3[1:] - sse3[:-1]) cls = to_onehot(np.where(dss3 == -1)[1], (None, 3)).T bbox = np.array( [np.where(dss3 == 1)[0], np.where(dss3 == -1)[0], *cls]).T pat3 = np.argmax(bbox[:, 2:], 1) patterns.append((pat3, pat8)) if len(patterns) == 0: print("No pattern find") return None, None, None, None c_patterns3, n_patterns3, c_patterns8, n_patterns8, weights = [], [], [], [], [] for pat3, pat8 in patterns: char_pat8 = "".join([sec_struct_codes[x] for x in pat8]) char_pat3 = "".join(["abc"[x] for x in pat3]) c_patterns8.append(char_pat8) n_patterns8.append(list(pat8)) c_patterns3.append(char_pat3) n_patterns3.append(list(pat3)) occ_sum8 = dict() occ_sum3 = dict() correspondings8 = dict() correspondings3 = dict() for c8, n8, c3, n3 in zip(c_patterns8, n_patterns8, c_patterns3, n_patterns3): if len(c3) == 0: continue if c3[0] != "c": c3 = "c" + c3 n3 = [2] + n3 if c3[-1] != "c": c3 = c3 + "c" n3 = n3 + [2] if c8[0] != "C": c8 = "C" + c8 n8 = [7] + n8 if c8[-1] != "C": c8 = c8 + "C" n8 = n8 + [7] if c8 not in occ_sum8.keys(): occ_sum8[c8] = 0 correspondings8[c8] = c8, n8 occ_sum8[c8] += 1 if c3 not in occ_sum3.keys(): occ_sum3[c3] = 0 correspondings3[c3] = c3, n3 occ_sum3[c3] += 1 c_pattern8, n_pattern8 = correspondings8[max(occ_sum8, key=occ_sum8.get)] c_pattern3, n_pattern3 = correspondings3[max(occ_sum3, key=occ_sum3.get)] push(f"{folder}/data.pt", "pattern", (c_pattern3, n_pattern3, c_pattern8, n_pattern8)) return c_pattern3, n_pattern3, c_pattern8, n_pattern8, occ_sum3, occ_sum8
from matplotlib.lines import Line2D from matplotlib import colors import matplotlib as mpl import biotite.structure as struc import biotite.structure.io as strucio import biotite.structure.io.xtc as xtc from biotite.application.dssp import DsspApp # Put here the path of the downloaded files templ_file_path = "../../download/lysozyme_md.pdb" traj_file_path = "../../download/lysozyme_md.xtc" xtc_file = xtc.XTCFile.read(traj_file_path) traj = xtc_file.get_structure(template=strucio.load_structure(templ_file_path)) time = xtc_file.get_time() traj = traj[:, struc.filter_amino_acids(traj)] # DSSP does not assign an SSE to the last residue -> -1 sse = np.empty((traj.shape[0], struc.get_residue_count(traj) - 1), dtype='U1') for idx, frame in enumerate(traj): app = DsspApp(traj[idx]) app.start() app.join() sse[idx] = app.get_sse() # Matplotlib needs numbers to assign colors correctly def sse_to_num(sse): num = np.empty(sse.shape, dtype=int) num[sse == 'C'] = 0 num[sse == 'E'] = 1
def test_docking(flexible): """ Test :class:`VinaApp` for the case of docking biotin to streptavidin. The output binding pose should be very similar to the pose in the PDB structure. """ # A structure of a straptavidin-biotin complex mmtf_file = mmtf.MMTFFile.read(join(data_dir("application"), "2rtg.mmtf")) structure = mmtf.get_structure(mmtf_file, model=1, extra_fields=["charge"], include_bonds=True) structure = structure[structure.chain_id == "B"] receptor = structure[struc.filter_amino_acids(structure)] ref_ligand = structure[structure.res_name == "BTN"] ref_ligand_coord = ref_ligand.coord ligand = info.residue("BTN") # Remove hydrogen atom that is missing in ref_ligand ligand = ligand[ligand.atom_name != "HO2"] if flexible: # Two residues within the binding pocket: ASN23, SER88 flexible_mask = np.isin(receptor.res_id, (23, 88)) else: flexible_mask = None app = VinaApp(ligand, receptor, struc.centroid(ref_ligand), [20, 20, 20], flexible=flexible_mask) app.set_seed(0) app.start() app.join() test_ligand_coord = app.get_ligand_coord() test_receptor_coord = app.get_receptor_coord() energies = app.get_energies() # One energy value per model assert len(test_ligand_coord) == len(energies) assert len(test_receptor_coord) == len(energies) assert np.all(energies < 0) # Select best binding pose test_ligand_coord = test_ligand_coord[0] not_nan_mask = ~np.isnan(test_ligand_coord).any(axis=-1) ref_ligand_coord = ref_ligand_coord[not_nan_mask] test_ligand_coord = test_ligand_coord[not_nan_mask] # Check if it least one atom is preserved assert test_ligand_coord.shape[1] > 0 rmsd = struc.rmsd(ref_ligand_coord, test_ligand_coord) # The deviation of the best pose from the real conformation # should be less than 1 Å assert rmsd < 1.0 if flexible: # Select best binding pose test_receptor_coord = test_receptor_coord[0] not_nan_mask = ~np.isnan(test_receptor_coord).any(axis=-1) ref_receptor_coord = receptor[not_nan_mask] test_receptor_coord = test_receptor_coord[not_nan_mask] # Check if it least one atom is preserved assert test_receptor_coord.shape[1] > 0 # The flexible residues should have a maximum deviation of 1 Å # from the original conformation assert np.max(struc.distance(test_receptor_coord, ref_receptor_coord)) < 1.0 else: ref_receptor_coord = receptor.coord for model_coord in test_receptor_coord: assert np.array_equal(model_coord, ref_receptor_coord)
# Fetch animal lysoyzme structures lyso_files = rcsb.fetch(["1REX", "1AKI", "1DKJ", "1GD6"], format="mmtf", target_path=biotite.temp_dir()) organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"] # Create a PB sequence from each structure pb_seqs = [] for file_name in lyso_files: file = mmtf.MMTFFile() file.read(file_name) # Take only the first model into account array = mmtf.get_structure(file, model=1) # Remove everything but the first protein chain array = array[struc.filter_amino_acids(array)] array = array[array.chain_id == array.chain_id[0]] # Calculate backbone dihedral angles, # as the PBs are determined from them phi, psi, omega = struc.dihedral_backbone(array) # A PB requires the 8 phi/psi angles of 5 amino acids, # centered on the amino acid to calculate the PB for # Hence, the PBs are not defined for the two amino acids # at each terminus pb_angles = np.full((len(phi) - 4, 8), np.nan) pb_angles[:, 0] = psi[:-4] pb_angles[:, 1] = phi[1:-3] pb_angles[:, 2] = psi[1:-3] pb_angles[:, 3] = phi[2:-2] pb_angles[:, 4] = psi[2:-2]
def test_amino_acid_filter(sample_array): assert len(sample_array[struc.filter_amino_acids(sample_array)]) == 982
# The normal mode to be visualized # '-1' is the last (and most significant) one MODE = -1 # The amount of frames (models) per oscillation FRAMES = 60 # The maximum oscillation amplitude for an atom # (The length of the ANM's eigenvectors make only sense when compared # relative to each other, the absolute values have no significance) MAX_AMPLITUDE = 5 # Load structure mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1) # Filter first peptide chain protein_chain = structure[struc.filter_amino_acids(structure) & (structure.chain_id == structure.chain_id[0])] # Filter CA atoms ca = protein_chain[protein_chain.atom_name == "CA"] # Load eigenvectors for CA atoms # The first axis indicates the mode, # the second axis indicates the vector component vectors = np.loadtxt(VECTOR_FILE, delimiter=",").transpose() # Discard the last 6 modes, as these are movements of the entire system: # A system with N atoms has only 3N - 6 degrees of freedom # ^^^ vectors = vectors[:-6] # Extract vectors for given mode and reshape to (n,3) array mode_vectors = vectors[MODE].reshape((-1, 3)) # Rescale, so that the largest vector has the length 'MAX_AMPLITUDE'
import biotite.sequence as seq import biotite.sequence.io.fasta as fasta import biotite.sequence.align as align import biotite.sequence.graphics as graphics import biotite.application.blast as blast import biotite.application.clustalo as clustalo import biotite.database.rcsb as rcsb import biotite.database.entrez as entrez # Get structure and sequence pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif")) sequence = pdbx.get_sequence(pdbx_file)[0] # 'use_author_fields' is set to false, # to ensure that values in the 'res_id' annotation point to the sequence structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False) structure = structure[struc.filter_amino_acids(structure)] # Identity threshold for a sequence to be counted as homologous sequence IDENTITY_THESHOLD = 0.4 # Find homologous proteins in SwissProt via BLAST app = blast.BlastWebApp("blastp", sequence, database="swissprot") app.start() app.join() alignments = app.get_alignments() hit_seqs = [sequence] hit_ids = ["Query"] hit_starts = [1] for ali in alignments: identity = align.get_sequence_identity(ali) # Do not include the exact same sequence -> identity < 1.0 if identity > IDENTITY_THESHOLD and identity < 1.0:
import biotite.database.rcsb as rcsb import biotite.application.autodock as autodock # Get the receptor structure # and the original 'correct' conformation of the ligand mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf")) structure = mmtf.get_structure( # Include formal charge for accurate partial charge calculation mmtf_file, model=1, include_bonds=True, extra_fields=["charge"]) # The asymmetric unit describes a streptavidin homodimer # However, we are only interested in a single monomer structure = structure[structure.chain_id == "B"] receptor = structure[struc.filter_amino_acids(structure)] ref_ligand = structure[structure.res_name == "BTN"] ref_ligand_center = struc.centroid(ref_ligand) # Independently, get the ligand without optimized conformation # from the chemical components dictionary ligand = info.residue("BTN") # Search for a binding mode in a 20 Å radius # of the original ligand position app = autodock.VinaApp(ligand, receptor, ref_ligand_center, [20, 20, 20]) # For reproducibility app.set_seed(0) # This is the maximum number: # Vina may find less interesting binding modes
import biotite.database.rcsb as rcsb import ammolite PNG_SIZE = (800, 800) BACKBONE_ATOMS = ["N", "C", "O", "HA"] ######################################################################## mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True) # Select one monomer of the dimer structure = structure[structure.chain_id == structure.chain_id[0]] # Remove water and ions structure = structure[~struc.filter_solvent(structure) & ~struc.filter_monoatomic_ions(structure)] strep_mask = struc.filter_amino_acids(structure) biotin_mask = (structure.res_name == "BTN") pymol_obj = ammolite.PyMOLObject.from_structure(structure) pymol_obj.cartoon("loop", strep_mask) pymol_obj.set("cartoon_transparency", 0.5) pymol_obj.color("salmon", strep_mask & (structure.element == "C")) pymol_obj.color("forest", biotin_mask & (structure.element == "C")) pymol_obj.zoom(biotin_mask, buffer=5.0) ammolite.show(PNG_SIZE) ######################################################################## bonds = struc.hbond(structure, strep_mask, biotin_mask) res_ids = []
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
template_model = strucio.load_structure(templ_file_path) #templ_file_path = "output/npt.gro" #traj_file_path = "output/kinase_dimer_md_center.xtc" #traj_file_path = "output/kinase_dimer_nowater_fit.xtc" #traj_file_path = "output/kinase_dimer_md.xtc" traj_file_path = "output/dimer_nopbc_cluster_fit.xtc" # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning protein = strucio.load_structure(templ_file_path) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(protein) template = protein[protein_mask] # We could have loaded the trajectory also with # 'strucio.load_structure()', but in this case we only want to load # those coordinates that belong to the already selected atoms of the # template structure. # Hence, we use the 'XTCFile' class directly to load the trajectory # This gives us the additional option that allows us to select the # coordinates belonging to the amino acids. print(" .. loading trajectory ...") xtc_file = xtc.XTCFile() #xtc_file.read(traj_file_path, 0, 10, atom_i=np.where(protein_mask)[0]) xtc_file.read(traj_file_path, atom_i=np.where(protein_mask)[0]) #xtc_file.read(traj_file_path)
"S": "c", "H": "a", "E": "b", "G": "c", "B": "b", "T": "c", "C": "c" } # Fetch and load structure file_name = rcsb.fetch("1QGD", "mmtf", biotite.temp_dir()) mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) # Transketolase homodimer tk_dimer = array[struc.filter_amino_acids(array)] # Transketolase monomer tk_mono = tk_dimer[tk_dimer.chain_id == "A"] # The chain ID corresponding to each residue chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)] sse = mmtf_file["secStructList"] sse = sse[sse != -1] sse = sse[chain_id_per_res == "A"] sse = np.array([sec_struct_codes[code] for code in sse if code != -1], dtype="U1") sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1") # Helper function to convert secondary structure array to annotation # and visualize it