def test_array_conversion(path, model, hybrid36): pdb_file = pdb.PDBFile.read(path) # Test also the thin wrapper around the methods # 'get_structure()' and 'set_structure()' try: array1 = pdb.get_structure(pdb_file, model=model) except biotite.InvalidFileError: if model is None: # The file cannot be parsed into an AtomArrayStack, # as the models contain different numbers of atoms # -> skip this test case return else: raise if hybrid36 and (array1.res_id < 1).any(): with pytest.raises(ValueError, match="Only positive integers can be converted " "into hybrid-36 notation"): pdb_file = pdb.PDBFile() pdb.set_structure(pdb_file, array1, hybrid36=hybrid36) return else: pdb_file = pdb.PDBFile() pdb.set_structure(pdb_file, array1, hybrid36=hybrid36) array2 = pdb.get_structure(pdb_file, model=model) if array1.box is not None: assert np.allclose(array1.box, array2.box) assert array1.bonds == array2.bonds for category in array1.get_annotation_categories(): assert array1.get_annotation(category).tolist() == \ array2.get_annotation(category).tolist() assert array1.coord.tolist() == array2.coord.tolist()
def test_array_conversion(path, single_model, hybrid36): model = 1 if single_model else None pdb_file = pdb.PDBFile.read(path) # Test also the thin wrapper around the methods # 'get_structure()' and 'set_structure()' array1 = pdb.get_structure(pdb_file, model=model) if hybrid36 and (array1.res_id < 1).any(): with pytest.raises(ValueError, match="Only positive integers can be converted " "into hybrid-36 notation"): pdb_file = pdb.PDBFile() pdb.set_structure(pdb_file, array1, hybrid36=hybrid36) return else: pdb_file = pdb.PDBFile() pdb.set_structure(pdb_file, array1, hybrid36=hybrid36) array2 = pdb.get_structure(pdb_file, model=model) if array1.box is not None: assert np.allclose(array1.box, array2.box) assert array1.bonds == array2.bonds for category in array1.get_annotation_categories(): assert array1.get_annotation(category).tolist() == \ array2.get_annotation(category).tolist() assert array1.coord.tolist() == array2.coord.tolist()
def test_fetch(format, as_file_like): path = None if as_file_like else biotite.temp_dir() file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True) if format == "pdb": file = pdb.PDBFile() file.read(file_path_or_obj) pdb.get_structure(file) elif format == "pdbx": file = pdbx.PDBxFile() file.read(file_path_or_obj) pdbx.get_structure(file) elif format == "mmtf": file = mmtf.MMTFFile() file.read(file_path_or_obj) mmtf.get_structure(file)
def test_fetch(format, as_file_like): path = None if as_file_like else biotite.temp_dir() file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True) if format == "pdb": file = pdb.PDBFile.read(file_path_or_obj) pdb.get_structure(file) elif format == "pdbx": file = pdbx.PDBxFile.read(file_path_or_obj) pdbx.get_structure(file) elif format == "mmtf": file = mmtf.MMTFFile.read(file_path_or_obj) mmtf.get_structure(file) elif format == "fasta": file = fasta.FastaFile.read(file_path_or_obj) # Test if the file contains any sequences assert len(fasta.get_sequences(file)) > 0
def load_structure(fpath, chain=None): """ Args: fpath: filepath to either pdb or cif file chain: the chain id Returns: biotite.structure.AtomArray """ if fpath.endswith('cif'): with open(fpath) as fin: pdbxf = pdbx.PDBxFile.read(fin) structure = pdbx.get_structure(pdbxf, model=1) elif fpath.endswith('pdb'): with open(fpath) as fin: pdbf = pdb.PDBFile.read(fin) structure = pdb.get_structure(pdbf, model=1) issolvent = filter_solvent(structure) structure = structure[~issolvent] chains = get_chains(structure) print(f'Found {len(chains)} chains:', chains, '\n') if len(chains) == 0: raise ValueError('No chains found in the input file.') if chain is None: chain = chains[0] if chain not in chains: raise ValueError(f'Chain {chain} not found in input file') structure = structure[structure.chain_id == chain] print(f'Loaded chain {chain}\n') return structure
def test_get_model_count(): pdb_file = pdb.PDBFile.read(join(data_dir("structure"), "1l2y.pdb")) # Test also the thin wrapper around the method # 'get_model_count()' test_model_count = pdb.get_model_count(pdb_file) ref_model_count = pdb.get_structure(pdb_file).stack_depth() assert test_model_count == ref_model_count
def test_array_conversion(path, single_model, hybrid36): model = 1 if single_model else None pdb_file = pdb.PDBFile() pdb_file.read(path) # Test also the thin wrapper around the methods # 'get_structure()' and 'set_structure()' array1 = pdb.get_structure(pdb_file, model=model) pdb_file = pdb.PDBFile() pdb.set_structure(pdb_file, array1, hybrid36=hybrid36) array2 = pdb.get_structure(pdb_file, model=model) if array1.box is not None: assert np.allclose(array1.box, array2.box) assert array1.bonds == array2.bonds for category in array1.get_annotation_categories(): assert array1.get_annotation(category).tolist() == \ array2.get_annotation(category).tolist() assert array1.coord.tolist() == array2.coord.tolist()
def test_id_overflow(): # Create an atom array >= 100k atoms length = 100000 a = struc.AtomArray(length) a.coord = np.zeros(a.coord.shape) a.chain_id = np.full(length, "A") # Create residue IDs over 10000 a.res_id = np.arange(1, length + 1) a.res_name = np.full(length, "GLY") a.hetero = np.full(length, False) a.atom_name = np.full(length, "CA") a.element = np.full(length, "C") # Write stack to pdb file and make sure a warning is thrown with pytest.warns(UserWarning): temp = TemporaryFile("w+") pdb_file = pdb.PDBFile() pdb_file.set_structure(a) pdb_file.write(temp) # Assert file can be read properly temp.seek(0) a2 = pdb.get_structure(pdb.PDBFile.read(temp)) assert (a2.array_length() == a.array_length()) # Manually check if the written atom id is correct temp.seek(0) last_line = temp.readlines()[-1] atom_id = int(last_line.split()[1]) assert (atom_id == 1) temp.close() # Write stack as hybrid-36 pdb file: no warning should be thrown with pytest.warns(None) as record: temp = TemporaryFile("w+") tmp_pdb_file = pdb.PDBFile() tmp_pdb_file.set_structure(a, hybrid36=True) tmp_pdb_file.write(temp) assert len(record) == 0 # Manually check if the output is written as correct hybrid-36 temp.seek(0) last_line = temp.readlines()[-1] atom_id = last_line.split()[1] assert (atom_id == "A0000") res_id = last_line.split()[4][1:] assert (res_id == "BXG0") temp.close()
def plot_rna(pdb_id, axes): # Download the PDB file and read the structure pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and their pseudoknot order base_pairs = struc.base_pairs(nucleotides) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten() ).reshape(base_pairs.shape) pseudoknot_order = struc.pseudoknots(base_pairs)[0] # Set the linestyle according to the pseudoknot order linestyles = np.full(base_pairs.shape[0], '-', dtype=object) linestyles[pseudoknot_order == 1] = '--' linestyles[pseudoknot_order == 2] = ':' # Indicate canonical nucleotides with an upper case one-letter-code # and non-canonical nucleotides with a lower case one-letter-code base_labels = [] for base in struc.residue_iter(nucleotides): one_letter_code, exact = struc.map_nucleotide(base) if exact: base_labels.append(one_letter_code) else: base_labels.append(one_letter_code.lower()) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( axes, base_labels, base_pairs, struc.get_residue_count(nucleotides), pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles, bond_color=colors, # Margin to compensate for reduced axis limits in shared axis border=0.13 ) # Use the PDB ID to label each plot axes.set_title(pdb_id, loc="left")
def test_bond_parsing(): """ Compare parsing of bonds from PDB with output from :func:`connect_via_residue_names()`. """ # Choose a structure with CONECT records to test these as well path = join(data_dir("structure"), "3o5r.pdb") pdb_file = pdb.PDBFile.read(path) atoms = pdb.get_structure(pdb_file, model=1, include_bonds=True) test_bonds = atoms.bonds ref_bonds = struc.connect_via_residue_names(atoms) ref_bonds.remove_bond_order() assert test_bonds.as_set() == ref_bonds.as_set()
# Code source: Tom David Müller # License: BSD 3 clause from tempfile import gettempdir import biotite import biotite.structure.io.pdb as pdb import biotite.database.rcsb as rcsb import biotite.structure as struc import biotite.structure.graphics as graphics import matplotlib.pyplot as plt import numpy as np # Download the PDB file and read the structure pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and the Leontis-Westhof nomenclature base_pairs = struc.base_pairs(nucleotides) glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs) edges = struc.base_pairs_edge(nucleotides, base_pairs) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten()).reshape(base_pairs.shape) # Get the one-letter-codes of the bases base_labels = [] for base in struc.residue_iter(nucleotides): base_labels.append(base.res_name[0]) # Color canonical Watson-Crick base pairs with a darker orange and