def test_get_atoms(cell_size): """ Test the correct functionality of a cell list on a simple test case with known solutions. """ array = struc.AtomArray(length=5) array.coord = np.array([[0,0,i] for i in range(5)]) cell_list = struc.CellList(array, cell_size=cell_size) assert cell_list.get_atoms(np.array([0,0,0.1]), 1).tolist() == [0,1] assert cell_list.get_atoms(np.array([0,0,1.1]), 1).tolist() == [1,2] assert cell_list.get_atoms(np.array([0,0,1.1]), 2).tolist() == [0,1,2,3] # Multiple positions pos = np.array([[0,0,0.1], [0,0,1.1], [0,0,4.1]]) expected_indices = [0, 1, 2, 0, 1, 2, 3, 3, 4] indices = cell_list.get_atoms(pos, 2) assert indices[indices != -1].tolist() == expected_indices # Multiple positions and multiple radii pos = np.array([[0,0,0.1], [0,0,1.1], [0,0,4.1]]) rad = np.array([1.0, 2.0, 3.0]) expected_indices = [0, 1, 0, 1, 2, 3, 2, 3, 4] indices = cell_list.get_atoms(pos, rad) assert indices[indices != -1].tolist() == expected_indices
def array(): """ Create an :class:`AtomArray` containing a lot of different molecules. The atoms that belong to a single molecule are not adjacent in the :class:`AtomArray`, but a are shuffled in random positions of the :class:`AtomArray`. """ MOL_NAMES = [ "ARG", # Molecule with multiple branches "TRP", # Molecule with a cycle "GLC", # Molecule with a cycle "NA", # A single atom "ATP" # Larger molecule ] N_MOLECULES = 20 np.random.seed(0) atom_array = struc.AtomArray(0) for i, mol_name in enumerate(np.random.choice(MOL_NAMES, N_MOLECULES)): molecule = info.residue(mol_name) molecule.res_id[:] = i + 1 atom_array += molecule reordered_indices = np.random.choice(np.arange(atom_array.array_length()), atom_array.array_length(), replace=False) atom_array = atom_array[reordered_indices] return atom_array
def test_standardize_order(multi_model, seed): original = load_structure(join(data_dir("structure"), "1l2y.mmtf")) if not multi_model: original = original[0] # The box is not preserved when concatenating atom arrays later # This would complicate the atom array equality later original.box = None # Randomly reorder the atoms in each residue np.random.seed(seed) if multi_model: reordered = struc.AtomArrayStack(original.stack_depth(), 0) else: reordered = struc.AtomArray(0) for residue in struc.residue_iter(original): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) reordered += residue[..., indices] # Restore the original PDB standard order restored = reordered[..., strucinfo.standardize_order(reordered)] assert restored.shape == original.shape assert restored[..., restored.element != "H"] \ == original[..., original.element != "H"]
def test_id_overflow(): # Create an atom array >= 100k atoms length = 100000 a = struc.AtomArray(length) a.coord = np.zeros(a.coord.shape) a.chain_id = np.full(length, "A") # Create residue IDs over 10000 a.res_id = np.arange(1, length + 1) a.res_name = np.full(length, "GLY") a.hetero = np.full(length, False) a.atom_name = np.full(length, "CA") a.element = np.full(length, "C") # Write stack to pdb file and make sure a warning is thrown with pytest.warns(UserWarning): temp = TemporaryFile("w+") pdb_file = pdb.PDBFile() pdb_file.set_structure(a) pdb_file.write(temp) # Assert file can be read properly temp.seek(0) a2 = pdb.get_structure(pdb.PDBFile.read(temp)) assert (a2.array_length() == a.array_length()) # Manually check if the written atom id is correct temp.seek(0) last_line = temp.readlines()[-1] atom_id = int(last_line.split()[1]) assert (atom_id == 1) temp.close() # Write stack as hybrid-36 pdb file: no warning should be thrown with pytest.warns(None) as record: temp = TemporaryFile("w+") tmp_pdb_file = pdb.PDBFile() tmp_pdb_file.set_structure(a, hybrid36=True) tmp_pdb_file.write(temp) assert len(record) == 0 # Manually check if the output is written as correct hybrid-36 temp.seek(0) last_line = temp.readlines()[-1] atom_id = last_line.split()[1] assert (atom_id == "A0000") res_id = last_line.split()[4][1:] assert (res_id == "BXG0") temp.close()
def test_base_pairs_reverse_no_hydrogen(nuc_sample_array, basepairs): """ Remove the hydrogens from the sample structure. Then reverse the order of residues in the atom_array and then test the function base_pairs. """ nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"] # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = base_pairs(reversed_nuc_sample_array) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_reverse(nuc_sample_array, basepairs, unique_bool): """ Reverse the order of residues in the atom_array and then test the function base_pairs. """ # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = base_pairs(reversed_nuc_sample_array, unique=unique_bool) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_reordered(nuc_sample_array, seed): """ Test the function base_pairs with structure where the atoms are not in the RCSB-Order. """ # Randomly reorder the atoms in each residue nuc_sample_array_reordered = struc.AtomArray(0) np.random.seed(seed) for residue in struc.residue_iter(nuc_sample_array): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) nuc_sample_array_reordered += residue[..., indices] assert (np.all( struc.base_pairs(nuc_sample_array) == struc.base_pairs( nuc_sample_array_reordered)))
def convert_to_atom_array(chempy_model, include_bonds=False): """ Convert a :class:`chempy.models.Indexed` object into an :class:`AtomArray`. The returned :class:`AtomArray` contains the optional annotation categories ``b_factor``, ``occupancy``, ``charge`` and ``altloc_id``. No *altloc* ID filtering is performed. Parameters ---------- chempy_model : Indexed The ``chempy`` model. include_bonds : bool, optional If set to true, an associated :class:`BondList` will be created for the returned atom array. Returns ------- atom_array : AtomArray The converted structure. """ atoms = chempy_model.atom bonds = chempy_model.bond atom_array = struc.AtomArray(len(atoms)) # Add annotation arrays atom_array.chain_id = np.array([a.chain for a in atoms], dtype="U3") atom_array.res_id = np.array([a.resi_number for a in atoms], dtype=int) atom_array.ins_code = np.array([a.ins_code for a in atoms], dtype="U1") atom_array.res_name = np.array([a.resn for a in atoms], dtype="U3") atom_array.hetero = np.array([a.hetatm for a in atoms], dtype=bool) atom_array.atom_name = np.array([a.name for a in atoms], dtype="U6") atom_array.element = np.array([a.symbol for a in atoms], dtype="U2") atom_array.set_annotation( "b_factor", np.array([a.b if hasattr(a, "b") else 0 for a in atoms], dtype=float)) atom_array.set_annotation( "occupancy", np.array([a.q if hasattr(a, "q") else 1.0 for a in atoms], dtype=float)) atom_array.set_annotation( "charge", np.array([ a.formal_charge if hasattr(a, "formal_charge") else 0 for a in atoms ], dtype=int)) atom_array.set_annotation( "altloc_id", np.array([a.alt if hasattr(a, "alt") else "" for a in atoms], dtype="U1")) # Set coordinates atom_array.coord = np.array([a.coord for a in atoms], dtype=np.float32) # Add bonds if include_bonds: bond_array = np.array([[b.index[0], b.index[1], b.order] for b in bonds], dtype=np.uint32) atom_array.bonds = struc.BondList(len(atoms), bond_array) return atom_array
def create_residue_dict(components_pdbx_file_path, msgpack_file_path): pdbx_file = pdbx.PDBxFile() pdbx_file.read(components_pdbx_file_path) components = pdbx_file.get_block_names() residue_dict = {} for i, component in enumerate(components): print(f"{component:3} {int(i/len(components)*100):>3d}%", end="\r") try: # Some entries use invalid quotation for the component name cif_general = pdbx_file.get_category("chem_comp", block=component) except ValueError: cif_general = None cif_atoms = pdbx_file.get_category("chem_comp_atom", block=component, expect_looped=True) cif_bonds = pdbx_file.get_category("chem_comp_bond", block=component, expect_looped=True) if cif_atoms is None: continue array = struc.AtomArray(len(list(cif_atoms.values())[0])) array.res_name = cif_atoms["comp_id"] array.atom_name = cif_atoms["atom_id"] array.element = cif_atoms["type_symbol"] array.add_annotation("charge", int) array.charge = np.array( [int(c) if c != "?" else 0 for c in cif_atoms["charge"]]) if cif_general is None: array.hetero[:] = True else: array.hetero[:] = True if cif_general["type"] == "NON-POLYMER" \ else False # For some entries only 'model_Cartn', # for some entries only 'pdbx_model_Cartn_ideal' and # for some entries none of them is defined try: array.coord[:, 0] = cif_atoms["pdbx_model_Cartn_x_ideal"] array.coord[:, 1] = cif_atoms["pdbx_model_Cartn_y_ideal"] array.coord[:, 2] = cif_atoms["pdbx_model_Cartn_z_ideal"] except (KeyError, ValueError): try: array.coord[:, 0] = cif_atoms["model_Cartn_x"] array.coord[:, 1] = cif_atoms["model_Cartn_y"] array.coord[:, 2] = cif_atoms["model_Cartn_z"] except (KeyError, ValueError): # If none of them is defined, skip this component continue bonds = struc.BondList(array.array_length()) if cif_bonds is not None: for atom1, atom2, order, aromatic_flag in zip( cif_bonds["atom_id_1"], cif_bonds["atom_id_2"], cif_bonds["value_order"], cif_bonds["pdbx_aromatic_flag"]): atom_i = np.where(array.atom_name == atom1)[0][0] atom_j = np.where(array.atom_name == atom2)[0][0] bond_type = BOND_ORDERS[order, aromatic_flag] bonds.add_bond(atom_i, atom_j, bond_type) array.bonds = bonds residue_dict[component] = array_to_dict(array) with open(msgpack_file_path, "wb") as msgpack_file: msgpack.dump(residue_dict, msgpack_file)
def assemble_peptide(sequence): res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence] peptide = struc.AtomArray(length=0) for res_id, res_name, connect_angle in zip( np.arange(1, len(res_names) + 1), res_names, itertools.cycle([120, -120])): # Create backbone atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N") atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0], atom_name="CA", element="C") coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord, atom_n.coord, 120, CA_C_LENGTH) atom_c = struc.Atom(coord_c, atom_name="C", element="C") coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord, atom_ca.coord, 120, C_O_DOUBLE_LENGTH) atom_o = struc.Atom(coord_o, atom_name="O", element="O") coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_ca.coord, -120, N_H_LENGTH) atom_h = struc.Atom(coord_h, atom_name="H", element="H") backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h]) backbone.res_id[:] = res_id backbone.res_name[:] = res_name # Add bonds between backbone atoms bonds = struc.BondList(backbone.array_length()) bonds.add_bond(0, 1, struc.BondType.SINGLE) # N-CA bonds.add_bond(1, 2, struc.BondType.SINGLE) # CA-C bonds.add_bond(2, 3, struc.BondType.DOUBLE) # C-O bonds.add_bond(0, 4, struc.BondType.SINGLE) # N-H backbone.bonds = bonds # Get residue from dataset residue = info.residue(res_name) # Superimpose backbone of residue # with backbone created previously _, transformation = struc.superimpose( backbone[struc.filter_backbone(backbone)], residue[struc.filter_backbone(residue)]) residue = struc.superimpose_apply(residue, transformation) # Remove backbone atoms from residue because they are already # existing in the backbone created prevoisly side_chain = residue[~np.isin( residue. atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])] # Assemble backbone with side chain (including HA) # and set annotation arrays residue = backbone + side_chain residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE) residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE) residue.chain_id[:] = "A" residue.res_id[:] = res_id residue.res_name[:] = res_name peptide += residue # Connect current residue to existing residues in the chain if res_id > 1: index_prev_ca = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "CA"))[0][0] index_prev_c = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "C"))[0][0] index_curr_n = np.where((peptide.res_id == res_id) & (peptide.atom_name == "N"))[0][0] index_curr_c = np.where((peptide.res_id == res_id) & (peptide.atom_name == "C"))[0][0] curr_residue_mask = peptide.res_id == res_id # Adjust geometry curr_coord_n = calculate_atom_coord_by_z_rotation( peptide.coord[index_prev_c], peptide.coord[index_prev_ca], connect_angle, C_N_LENGTH) peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n] peptide.coord[curr_residue_mask] += curr_coord_n # Adjacent residues should show in opposing directions # -> rotate residues with even residue ID by 180 degrees if res_id % 2 == 0: coord_n = peptide.coord[index_curr_n] coord_c = peptide.coord[index_curr_c] peptide.coord[curr_residue_mask] = struc.rotate_about_axis( atoms=peptide.coord[curr_residue_mask], axis=coord_c - coord_n, angle=np.deg2rad(180), support=coord_n) # Add bond between previous C and current N peptide.bonds.add_bond(index_prev_c, index_curr_n, struc.BondType.SINGLE) # Add N-terminal hydrogen atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0] atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0] coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord, -120, N_H_LENGTH) atom_h2 = struc.Atom(coord_h2, chain_id="A", res_id=1, res_name=atom_h.res_name, atom_name="H2", element="H") peptide = struc.array([atom_h2]) + peptide peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE) # H2-N # Add C-terminal hydroxyl group last_id = len(sequence) index_c = np.where((peptide.res_id == last_id) & (peptide.atom_name == "C"))[0][0] index_o = np.where((peptide.res_id == last_id) & (peptide.atom_name == "O"))[0][0] coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c], peptide.coord[index_o], connect_angle, C_O_LENGTH) coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt, peptide.coord[index_c], connect_angle, O_H_LENGTH) atom_oxt = struc.Atom(coord_oxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="OXT", element="O") atom_hxt = struc.Atom(coord_hxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="HXT", element="H") peptide = peptide + struc.array([atom_oxt, atom_hxt]) peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE) # C-OXT peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE) # OXT-HXT return peptide
print("Cell:") print(cell) ######################################################################## # An atom array can have an associated box, which is used in functions, # that consider periodic boundary conditions. # Atom array stacks require a *(m,3,3)*-shaped :class:`ndarray`, # that contains the box vectors for each model. # The box is accessed via the `box` attribute, which is ``None`` by # default. # When loaded from a structure file, the box described in the file is # automatically used. import biotite.database.rcsb as rcsb import biotite.structure.io as strucio array = struc.AtomArray(length=100) print(array.box) array.box = box print(array.box) file_path = rcsb.fetch("3o5r", "mmtf", biotite.temp_dir()) array = strucio.load_structure(file_path) print(array.box) ######################################################################## # When loading a trajectory from an MD simulation, the molecules are # often fragmented over the periodic boundary. # While a lot of analysis functions can handle such periodic boundary # conditions automatically, some require completed molecules. # In this case you should use :func:`remove_pbc()`. array = struc.remove_pbc(array)