def phi_iter(atoms): """ Iterate over all phi angles in a protein. """ res_iter1 = struct.residue_iter(atoms) res_iter2 = struct.residue_iter(atoms) next(res_iter1) yield None # No phi for first AS for res, prev_res in zip(res_iter1, res_iter2): yield prev_res[..., prev_res.atom_name == "C"] \ + res[..., struct.filter_backbone(res)]
def psi_iter(atoms): """ Iterate over all psi angles in a protein. """ res_iter1 = struct.residue_iter(atoms) res_iter2 = struct.residue_iter(atoms) next(res_iter2) for res, next_res in zip(res_iter1, res_iter2): yield res[..., struct.filter_backbone(res)] \ + next_res[..., next_res.atom_name == "N"] yield None # No psi for last AS
def test_standardize_order(multi_model, seed): original = load_structure(join(data_dir("structure"), "1l2y.mmtf")) if not multi_model: original = original[0] # The box is not preserved when concatenating atom arrays later # This would complicate the atom array equality later original.box = None # Randomly reorder the atoms in each residue np.random.seed(seed) if multi_model: reordered = struc.AtomArrayStack(original.stack_depth(), 0) else: reordered = struc.AtomArray(0) for residue in struc.residue_iter(original): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) reordered += residue[..., indices] # Restore the original PDB standard order restored = reordered[..., strucinfo.standardize_order(reordered)] assert restored.shape == original.shape assert restored[..., restored.element != "H"] \ == original[..., original.element != "H"]
def test_residue_iter(array): centroid = [ struc.centroid(res).tolist() for res in struc.residue_iter(array) ] ref_centroid = struc.apply_residue_wise(array, array.coord, np.average, axis=0) assert centroid == ref_centroid.tolist()
def plot_rna(pdb_id, axes): # Download the PDB file and read the structure pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and their pseudoknot order base_pairs = struc.base_pairs(nucleotides) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten() ).reshape(base_pairs.shape) pseudoknot_order = struc.pseudoknots(base_pairs)[0] # Set the linestyle according to the pseudoknot order linestyles = np.full(base_pairs.shape[0], '-', dtype=object) linestyles[pseudoknot_order == 1] = '--' linestyles[pseudoknot_order == 2] = ':' # Indicate canonical nucleotides with an upper case one-letter-code # and non-canonical nucleotides with a lower case one-letter-code base_labels = [] for base in struc.residue_iter(nucleotides): one_letter_code, exact = struc.map_nucleotide(base) if exact: base_labels.append(one_letter_code) else: base_labels.append(one_letter_code.lower()) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( axes, base_labels, base_pairs, struc.get_residue_count(nucleotides), pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles, bond_color=colors, # Margin to compensate for reduced axis limits in shared axis border=0.13 ) # Use the PDB ID to label each plot axes.set_title(pdb_id, loc="left")
def test_base_pairs_reverse_no_hydrogen(nuc_sample_array, basepairs): """ Remove the hydrogens from the sample structure. Then reverse the order of residues in the atom_array and then test the function base_pairs. """ nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"] # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = base_pairs(reversed_nuc_sample_array) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_reverse(nuc_sample_array, basepairs, unique_bool): """ Reverse the order of residues in the atom_array and then test the function base_pairs. """ # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = base_pairs(reversed_nuc_sample_array, unique=unique_bool) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_reordered(nuc_sample_array, seed): """ Test the function base_pairs with structure where the atoms are not in the RCSB-Order. """ # Randomly reorder the atoms in each residue nuc_sample_array_reordered = struc.AtomArray(0) np.random.seed(seed) for residue in struc.residue_iter(nuc_sample_array): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) nuc_sample_array_reordered += residue[..., indices] assert (np.all( struc.base_pairs(nuc_sample_array) == struc.base_pairs( nuc_sample_array_reordered)))
def test_mass(): """ Test whether the mass of a residue is the same as the sum of the masses of its contained atoms. """ array = load_structure(join(data_dir, "1l2y.mmtf"))[0] _, res_names = struc.get_residues(array) water_mass = strucinfo.mass("H") * 2 + strucinfo.mass("O") # Mass of water must be subtracted masses = [strucinfo.mass(res_name) - water_mass for res_name in res_names] # C-terminus normally has additional oxygen atom masses[-1] += strucinfo.mass("O") ref_masses = [strucinfo.mass(res) for res in struc.residue_iter(array)] # Up to three additional/missing hydrogens are allowed # (protonation state) mass_diff = np.abs( np.array( [mass - ref_mass for mass, ref_mass in zip(masses, ref_masses)])) assert (mass_diff // strucinfo.mass("H") <= 3).all() assert np.allclose((mass_diff % strucinfo.mass("H")), 0, atol=5e-3)
# Download the PDB file and read the structure pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and the Leontis-Westhof nomenclature base_pairs = struc.base_pairs(nucleotides) glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs) edges = struc.base_pairs_edge(nucleotides, base_pairs) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten()).reshape(base_pairs.shape) # Get the one-letter-codes of the bases base_labels = [] for base in struc.residue_iter(nucleotides): base_labels.append(base.res_name[0]) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Use the base labels to indicate the Leontis-Westhof nomenclature for bases, edge_types, orientation in zip(base_pairs, edges, glycosidic_bonds): for base, edge in zip(bases, edge_types): if orientation == 1: