# The radius does merely fluctuate in a range of approx. 0.5 Angstrom # during the entire simulation. # # Let's have a look at single amino acids: # Which residues fluctuate most? # For answering this question we calculate the RMSF # (Root mean square fluctuation). It is similar to the RMSD, but instead # of averaging over the atoms and looking at each time step, we # average over the time and look at each residue. Usually the average # model is taken as reference (compared to the starting model for RMSD). # # Since side chain atoms fluctuate quite a lot, they are not suitable # for evaluation of the residue flexibility. Therefore, we consider only # CA atoms. # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6,3)) ax = figure.add_subplot(111) ax.plot(np.arange(1, 21), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, 20) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Angstrom)") ax.set_xticks(np.arange(1, 21)) ax.set_xticklabels(np.arange(1, 21)) figure.tight_layout() plt.show()
legend_x = 1 legend_y = 0.5 plt.legend(loc='center left', bbox_to_anchor=(legend_x, legend_y)) #plt.legend(loc="best") plt.tight_layout() plt.savefig("radius_two.png", dpi=600) plt.clf() ca_trajectory_kinase_left = trajectory_kinase_left[:, trajectory_kinase_left. atom_name == "CA"] ca_trajectory_kinase_right = trajectory_kinase_right[:, trajectory_kinase_right. atom_name == "CA"] rmsf_kinase_left = struc.rmsf(struc.average(ca_trajectory_kinase_left), ca_trajectory_kinase_left) rmsf_upper_kinase_left = rmsf_kinase_left.max() * 1.1 rmsf_kinase_right = struc.rmsf(struc.average(ca_trajectory_kinase_right), ca_trajectory_kinase_right) rmsf_upper_kinase_right = rmsf_kinase_right.max() * 1.1 fig, (ax1, ax2) = plt.subplots(2, 1) res_count = struc.get_residue_count(trajectory_kinase_left) ax1.plot(np.arange(1, res_count + 1) + 2801, rmsf_kinase_left, color=biotite.colors["dimorange"]) ax1.set_title("Kinase Left") #ax1.axvline(3828, ls="--", color="k") #ax1.axvline(3838, ls="--", color="k")
def test_rmsf(stack, as_coord): if as_coord: stack = stack.coord assert struc.rmsf(struc.average(stack), stack).tolist() \ == pytest.approx([21.21320344] * 5)
def test_rmsf(stack): assert struc.rmsf(struc.average(stack), stack).tolist() \ == pytest.approx([21.21320344] * 5)
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
######################################################################## # Fetch and load human CD2 NMR structure and remove glycan mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("1gya", "mmtf")) cd2 = mmtf.get_structure(mmtf_file, include_bonds=True) cd2 = cd2[..., struc.filter_amino_acids(cd2)] # Push first model to PyMOL pymol_cd2 = ammolite.PyMOLObject.from_structure(cd2[0]) ammolite.show(PNG_SIZE) ######################################################################## # Use RMSF between NMR models as measure of flexibility rmsf = struc.rmsf(struc.average(cd2), cd2) # Use logarithmic scale log_rmsf = np.log(rmsf) # Set maximum value for a CA to 1.0 flexibility = log_rmsf / np.max(log_rmsf[cd2.atom_name == "CA"]) # Use a Matplotlib color map for flexibility coloring # Use discrete color 'steps' N_COLORS = 20 cmap = plt.get_cmap("Reds") for threshold_flex in np.linspace(1.0, 0.0, N_COLORS): # Discard alpha channel color = cmap(threshold_flex)[:3] pymol_cd2.color(color, flexibility <= threshold_flex) ammolite.show(PNG_SIZE) # sphinx_gallery_thumbnail_number = 2
# to a reference model, which is usually the averaged structure. # Since we are only interested in the backbone flexibility, we consider # only CA atoms. # Before we can calculate a reasonable RMSF, we have to superimpose each # model on a reference model (we choose the first model), # which minimizes the *root mean square deviation* (RMSD). stack = strucio.load_structure(file_path) # We consider only CA atoms stack = stack[:, stack.atom_name == "CA"] # Superimposing all models of the structure onto the first model stack, transformation_tuple = struc.superimpose(stack[0], stack) print("RMSD for each model to first model:") print(struc.rmsd(stack[0], stack)) # Calculate the RMSF relative to average of all models rmsf = struc.rmsf(struc.average(stack), stack) # Plotting stuff plt.plot(np.arange(1, 21), rmsf) plt.xlim(0, 20) plt.xticks(np.arange(1, 21)) plt.xlabel("Residue") plt.ylabel("RMSF") plt.show() ######################################################################## # As you can see, both terminal residues are most flexible. # # Calculating accessible surface area # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Another interesting value for a protein structure is the