def test_masked_superimposition(seed): """ Take two models of the same structure and superimpose based on a single, randomly chosen atom. Since two atoms can be superimposed perfectly, the distance between the atom in both models should be 0. """ path = join(data_dir, "1l2y.mmtf") fixed = strucio.load_structure(path, model=1) mobile = strucio.load_structure(path, model=2) # Create random mask for a single atom np.random.seed(seed) mask = np.full(fixed.array_length(), False) mask[np.random.randint(fixed.array_length())] = True # The distance between the atom in both models should not be # already 0 prior to superimposition assert struc.distance(fixed[mask], mobile[mask])[0] \ != pytest.approx(0, abs=5e-4) fitted, transformation = struc.superimpose(fixed, mobile, mask) assert struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4) fitted = struc.superimpose_apply(mobile, transformation) struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4)
def test_superimposition_stack(ca_only): """ Take a structure with multiple models where each model is not (optimally) superimposed onto each other. Then superimpose and expect an improved RMSD. """ path = join(data_dir, "1l2y.mmtf") stack = strucio.load_structure(path) fixed = stack[0] mobile = stack[1:] if ca_only: mask = (mobile.atom_name == "CA") else: mask = None fitted, transformation = struc.superimpose(fixed, mobile, mask) if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file # -> Use average assert np.mean(struc.rmsd(fixed, fitted)) \ < np.mean(struc.rmsd(fixed, mobile)) else: # The superimpositions are better than the superimpositions # in the structure file assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
def test_superimposition_array(path): pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) fixed = pdbx.get_structure(pdbx_file, model=1) mobile = fixed.copy() mobile = struc.rotate(mobile, (1, 2, 3)) mobile = struc.translate(mobile, (1, 2, 3)) fitted, transformation = struc.superimpose(fixed, mobile, (mobile.atom_name == "CA")) assert struc.rmsd(fixed, fitted) == pytest.approx(0) fitted = struc.superimpose_apply(mobile, transformation) assert struc.rmsd(fixed, fitted) == pytest.approx(0)
def test_superimposition_array(path): """ Take a structure and rotate and translate a copy of it, so that they are not superimposed anymore. Then superimpose these structure onto each other and expect an almost perfect match. """ fixed = strucio.load_structure(path, model=1) mobile = fixed.copy() mobile = struc.rotate(mobile, (1, 2, 3)) mobile = struc.translate(mobile, (1, 2, 3)) fitted, transformation = struc.superimpose(fixed, mobile) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4) fitted = struc.superimpose_apply(mobile, transformation) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)
def test_superimposition_stack(ca_only): path = join(data_dir, "1l2y.cif") pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) stack = pdbx.get_structure(pdbx_file) fixed = stack[0] mobile = stack[1:] if ca_only: mask = (mobile.atom_name == "CA") else: mask = None fitted, transformation = struc.superimpose(fixed, mobile, mask) if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file # -> Use average assert np.mean(struc.rmsd(fixed, fitted)) \ < np.mean(struc.rmsd(fixed, mobile)) else: # The superimpositions are better than the superimpositions # in the structure file assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
save_structure("frame_1_coord.pdb", frame_1) save_structure("frame_1.pdb", trajectory[1]) print(" ... done ... ") print(" ... writing end frame ...") frame_end = template_model.copy() frame_end.coord = trajectory[-1].coord save_structure("frame_end_coord.pdb", frame_end) save_structure("frame_end.pdb", trajectory[-1]) print(" ... done ... ") rmsd_overall = struc.rmsd(trajectory[0], trajectory) radius_overall = struc.gyration_radius(trajectory) # kinase left trajectory_kinase_left, transform = struc.superimpose( trajectory_kinase_left[0], trajectory_kinase_left) rmsd_kinase_left = struc.rmsd(trajectory_kinase_left[0], trajectory_kinase_left) radius_kinase_left = struc.gyration_radius(trajectory_kinase_left) # kinase right trajectory_kinase_right, transform = struc.superimpose( trajectory_kinase_right[0], trajectory_kinase_right) rmsd_kinase_right = struc.rmsd(trajectory_kinase_right[0], trajectory_kinase_right) radius_kinase_right = struc.gyration_radius(trajectory_kinase_right) figure, (ax1, ax2) = plt.subplots(2, 1) ax1.plot(time, rmsd_kinase_left, color=biotite.colors["dimorange"]) ax1.set_xlim(time[0], time[-1])
coord[np.newaxis, :]) clashed = distances < vdw_radii_mean for clash_atom1, clash_atom2 in zip(*np.where(clashed)): if clash_atom1 == clash_atom2: # Ignore distance of an atom to itself continue if (clash_atom1, clash_atom2) not in bond_list: # Nonbonded atoms clash # -> structure is not accepted accepted = False rotamer_coord[i] = coord rotamers = struc.from_template(residue, rotamer_coord) ### Superimpose backbone onto first model for better visualization ### rotamers, _ = struc.superimpose(rotamers[0], rotamers, atom_mask=struc.filter_backbone(rotamers)) ### Visualize rotamers ### colors = np.zeros((residue.array_length(), 3)) colors[residue.element == "H"] = (0.8, 0.8, 0.8) # gray colors[residue.element == "C"] = (0.0, 0.8, 0.0) # green colors[residue.element == "N"] = (0.0, 0.0, 0.8) # blue colors[residue.element == "O"] = (0.8, 0.0, 0.0) # red # For consistency, each subplot has the same box size coord = rotamers.coord size = np.array([ coord[:, :, 0].max() - coord[:, :, 0].min(), coord[:, :, 1].max() - coord[:, :, 1].min(), coord[:, :, 2].max() - coord[:, :, 2].min() ]).max() * 0.5
# Gromacs does not set the element symbol in its PDB files # Therefore we simply determine the symbol # from the first character in the atom name # Since hydrogens may have leading numbers we simply ignore numbers for i in range(template.array_length()): template.element[i] = re.sub(r"\d", "", template.atom_name[i])[0] trajectory = strucio.load_structure(traj_file_path, template=template) ######################################################################## # At first we want to see if the simulation converged. # For this purpose we take the RMSD of a frame compared to the starting # structure as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we choose the # starting structure. trajectory, transform = struc.superimpose(template, trajectory) rmsd = struc.rmsd(template, trajectory) # Simulation was 1000 ps long time = np.linspace(0, 1000, len(trajectory)) figure = plt.figure(figsize=(6,3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(0,1000) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Angstrom)") figure.tight_layout() ######################################################################## # As we can see the simulation seems to converge already in the
ku_file = biotite.temp_file("ku.cif") # Download and parse structure files file = rcsb.fetch("1JEY", "mmtf", biotite.temp_dir()) ku_dna = strucio.load_structure(file) file = rcsb.fetch("1JEQ", "mmtf", biotite.temp_dir()) ku = strucio.load_structure(file) # Remove DNA and water ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")] ku_dna = ku_dna[~struc.filter_solvent(ku_dna)] ku = ku[~struc.filter_solvent(ku)] # The structures have a differing amount of atoms missing # at the the start and end of the structure # -> Find common structure ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)] ku_common = ku[struc.filter_intersection(ku, ku_dna)] # Superimpose ku_superimposed, transformation = struc.superimpose( ku_dna_common, ku_common, (ku_common.atom_name == "CA")) # We do not want the cropped structures # -> apply superimposition on structures before intersection filtering ku_superimposed = struc.superimpose_apply(ku, transformation) # Write PDBx files as input for PyMOL cif_file = pdbx.PDBxFile() pdbx.set_structure(cif_file, ku_dna, data_block="ku_dna") cif_file.write(ku_dna_file) cif_file = pdbx.PDBxFile() pdbx.set_structure(cif_file, ku_superimposed, data_block="ku") cif_file.write(ku_file) # Visualization with PyMOL... # biotite_static_image = ku_superimposition.png
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
def assemble_peptide(sequence): res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence] peptide = struc.AtomArray(length=0) for res_id, res_name, connect_angle in zip( np.arange(1, len(res_names) + 1), res_names, itertools.cycle([120, -120])): # Create backbone atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N") atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0], atom_name="CA", element="C") coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord, atom_n.coord, 120, CA_C_LENGTH) atom_c = struc.Atom(coord_c, atom_name="C", element="C") coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord, atom_ca.coord, 120, C_O_DOUBLE_LENGTH) atom_o = struc.Atom(coord_o, atom_name="O", element="O") coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_ca.coord, -120, N_H_LENGTH) atom_h = struc.Atom(coord_h, atom_name="H", element="H") backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h]) backbone.res_id[:] = res_id backbone.res_name[:] = res_name # Add bonds between backbone atoms bonds = struc.BondList(backbone.array_length()) bonds.add_bond(0, 1, struc.BondType.SINGLE) # N-CA bonds.add_bond(1, 2, struc.BondType.SINGLE) # CA-C bonds.add_bond(2, 3, struc.BondType.DOUBLE) # C-O bonds.add_bond(0, 4, struc.BondType.SINGLE) # N-H backbone.bonds = bonds # Get residue from dataset residue = info.residue(res_name) # Superimpose backbone of residue # with backbone created previously _, transformation = struc.superimpose( backbone[struc.filter_backbone(backbone)], residue[struc.filter_backbone(residue)]) residue = struc.superimpose_apply(residue, transformation) # Remove backbone atoms from residue because they are already # existing in the backbone created prevoisly side_chain = residue[~np.isin( residue. atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])] # Assemble backbone with side chain (including HA) # and set annotation arrays residue = backbone + side_chain residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE) residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE) residue.chain_id[:] = "A" residue.res_id[:] = res_id residue.res_name[:] = res_name peptide += residue # Connect current residue to existing residues in the chain if res_id > 1: index_prev_ca = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "CA"))[0][0] index_prev_c = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "C"))[0][0] index_curr_n = np.where((peptide.res_id == res_id) & (peptide.atom_name == "N"))[0][0] index_curr_c = np.where((peptide.res_id == res_id) & (peptide.atom_name == "C"))[0][0] curr_residue_mask = peptide.res_id == res_id # Adjust geometry curr_coord_n = calculate_atom_coord_by_z_rotation( peptide.coord[index_prev_c], peptide.coord[index_prev_ca], connect_angle, C_N_LENGTH) peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n] peptide.coord[curr_residue_mask] += curr_coord_n # Adjacent residues should show in opposing directions # -> rotate residues with even residue ID by 180 degrees if res_id % 2 == 0: coord_n = peptide.coord[index_curr_n] coord_c = peptide.coord[index_curr_c] peptide.coord[curr_residue_mask] = struc.rotate_about_axis( atoms=peptide.coord[curr_residue_mask], axis=coord_c - coord_n, angle=np.deg2rad(180), support=coord_n) # Add bond between previous C and current N peptide.bonds.add_bond(index_prev_c, index_curr_n, struc.BondType.SINGLE) # Add N-terminal hydrogen atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0] atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0] coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord, -120, N_H_LENGTH) atom_h2 = struc.Atom(coord_h2, chain_id="A", res_id=1, res_name=atom_h.res_name, atom_name="H2", element="H") peptide = struc.array([atom_h2]) + peptide peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE) # H2-N # Add C-terminal hydroxyl group last_id = len(sequence) index_c = np.where((peptide.res_id == last_id) & (peptide.atom_name == "C"))[0][0] index_o = np.where((peptide.res_id == last_id) & (peptide.atom_name == "O"))[0][0] coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c], peptide.coord[index_o], connect_angle, C_O_LENGTH) coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt, peptide.coord[index_c], connect_angle, O_H_LENGTH) atom_oxt = struc.Atom(coord_oxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="OXT", element="O") atom_hxt = struc.Atom(coord_hxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="HXT", element="H") peptide = peptide + struc.array([atom_oxt, atom_hxt]) peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE) # C-OXT peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE) # OXT-HXT return peptide
# For further analysis we need to reassemble the protein chain into a # whole molecule, without periodic boundaries. # in *Gromacs* we could have used ``gmx trjconv`` for this, but this # problem can be handled in *Biotite*, too. trajectory = struc.remove_pbc(trajectory) ######################################################################## # Now our trajectory is ready for some analysis! # At first we want to see if the simulation converged. # For this purpose we take the RMSD of a frame compared to the initial # model as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we also choose # the initial structure. trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() ######################################################################## # As we can see the simulation seems to converge already early in the # simulation. # After a about 200 ps the RMSD stays in a range of approx. 2 - 3 Å.
# Now we want to calculate a measure of flexibility for each residue in # *TC5b*. The *root mean square fluctuation* (RMSF) is a good value for # that. # It represents the deviation for each atom in all models relative # to a reference model, which is usually the averaged structure. # Since we are only interested in the backbone flexibility, we consider # only CA atoms. # Before we can calculate a reasonable RMSF, we have to superimpose each # model on a reference model (we choose the first model), # which minimizes the *root mean square deviation* (RMSD). stack = strucio.load_structure(file_path) # We consider only CA atoms stack = stack[:, stack.atom_name == "CA"] # Superimposing all models of the structure onto the first model stack, transformation_tuple = struc.superimpose(stack[0], stack) print("RMSD for each model to first model:") print(struc.rmsd(stack[0], stack)) # Calculate the RMSF relative to average of all models rmsf = struc.rmsf(struc.average(stack), stack) # Plotting stuff plt.plot(np.arange(1, 21), rmsf) plt.xlim(0, 20) plt.xticks(np.arange(1, 21)) plt.xlabel("Residue") plt.ylabel("RMSF") plt.show() ######################################################################## # As you can see, both terminal residues are most flexible. #
template_dimer = strucio.load_structure( "dimer_refined/pk_mono_sur_di_0001_000001_0001.pdb") print(" ... loading XTC files ... ") xtc_dimer = xtc.XTCFile() xtc_dimer.read("dimers_ordered_by_cleaned.xtc") #, 1, 10) print(" ... done ... ") print("") print("") trajectory_dimer = xtc_dimer.get_structure(template_dimer) pkcs_start = trajectory_dimer[0][trajectory_dimer[0].chain_id == 'A'] pkcs_start = pkcs_start[pkcs_start.atom_name == "CA"] trajectory_dimer, transform = struc.superimpose( trajectory_dimer[0], trajectory_dimer, (trajectory_dimer[0].chain_id == 'A') & (trajectory_dimer[0].atom_name == 'CA')) trajectory_dimer_ca = trajectory_dimer[:, (trajectory_dimer.atom_name == "CA") & ((trajectory_dimer.res_id < 3206) | (trajectory_dimer.res_id > 3226))] trajectory_dimer_activesite = trajectory_dimer[:, ( trajectory_dimer.res_id >= 3747) & (trajectory_dimer. res_id <= 4015)] trajectory_dimer_survivin_1 = trajectory_dimer[:, trajectory_dimer.chain_id == 'B'] trajectory_dimer_survivin_2 = trajectory_dimer[:,