def test_gro_no_box(): """ .gro file format requires valid box parameters at the end of each model. However, if we read such a file in, the resulting object should not need to have an assigned box. """ # Create an AtomArray atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1) atoms = array([atom]) # Write .gro file temp = TemporaryFile("w+") gro_file = gro.GROFile() gro_file.set_structure(atoms) gro_file.write(temp) # Read in file temp.seek(0) gro_file = gro.GROFile.read(temp) temp.close() s = gro_file.get_structure() # Assert no box with 0 dimension assert s.box is None
def test_valence_state_not_parametrized(): """ Test case in which parameters for a certain valence state of a generally parametrized atom are not available. In our case, it is sulfur having a double bond, i. e. only one binding partner. For this purpose, a fictitious molecule consisting of a central carbon bound to two hydrogen atoms via single bonds and to one sulfur atom via a double bond is created and tested. The expectations are the following: the sulfur's partial charge to be NaN and the carbons's partial charge to be smaller than that of the two hydrogens. """ with pytest.warns(UserWarning): fictitious_molecule = array([carbon, sulfur, hydrogen, hydrogen]) fictitious_molecule.bonds = BondList( fictitious_molecule.array_length(), np.array([[0, 1], [0, 2], [0, 3]])) mol_length = fictitious_molecule.array_length() fictitious_molecule.charge = np.array([0] * mol_length) charges = partial_charges(fictitious_molecule) sulfur_part_charge = charges[1] carb_part_charge = charges[0] hyd_part_charge = charges[2] assert np.isnan(sulfur_part_charge) assert carb_part_charge < hyd_part_charge
def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup): """ Write a new atom to a reference structure to visualise conserved non-protein atom sites. Parameters ---------- pdb_outname : str Filename of reference structure. atom_location : array (x,y,z) coordinates of the atom location with respect to the reference structure. atom_ID : str A unique ID for the atom. atomgroup : str MDAnalysis atomgroup to describe the atom. """ ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) res_id = atom_array.res_id[-1] + 1 # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="X", # The residue ID is the last ID in the file +1 res_id=res_id, res_name=atom_ID, hetero=True, atom_name=atomgroup, element="O") atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array)
def test_gro_id_overflow(): # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000 num_atoms = 100005 atoms = array([ Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=i + 1) for i in range(num_atoms) ]) atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # Write .gro file temp = TemporaryFile("w+") gro_file = gro.GROFile() gro_file.set_structure(atoms) gro_file.write(temp) # Read .gro file temp.seek(0) gro_file = gro.GROFile.read(temp) temp.close() s = gro_file.get_structure() assert s.array_length() == num_atoms
def array(): atom_list = [] atom_list.append(Atom([0.5, 2.5, 1.0])) atom_list.append(Atom([0.5, 2.7, 1.0])) atom_list.append(Atom([1.5, 1.5, 1.0])) atom_list.append(Atom([2.5, 0.5, 1.0])) atom_list.append(Atom([2.5, 0.7, 1.0])) atom_list.append(Atom([2.5, 0.5, 1.1])) return struc.array(atom_list)
def test_array_from_atoms(atom_list): """ Check whether custom annotations in :class:`Atom` objects are properly carried over to the :class:`AtomArray` when using :func:`array()`. """ for atom in atom_list: atom.some_annotation = 42 array = struc.array(atom_list) assert np.all(array.some_annotation == np.full(array.array_length(), 42)) assert np.issubdtype(array.some_annotation.dtype, np.integer)
def test_correct_output_ions(): """ Ions such as sodium or potassium are not parametrized. However, their formal charge is taken as partial charge since they are not involved in covalent bonding. Hence, it is expected that no warning is raised. The test is performed with a sodium ion. """ sodium = Atom([0, 0, 0], element="NA") sodium_array = array([sodium]) # Sodium possesses a formal charge of +1 sodium_array.charge = np.array([1]) # Sodium is not involved in covalent bonding sodium_array.bonds = BondList(sodium_array.array_length()) with pytest.warns(None) as record: partial_charges(sodium_array, iteration_step_num=1) assert len(record) == 0
def rotate_residue(mol, bond_number, angle): # --- Identify rotatable bonds --- rotatable_bonds = struc.find_rotatable_bonds(mol.bonds) # --- do not rotate about backbone bonds --- for atom_name in BACKBONE: index = np.where(mol.atom_name == atom_name)[0][0] rotatable_bonds.remove_bonds_to(index) # --- init coordinates for new model --- coord = mol.coord.copy() # --- get bond axis --- atom_i, atom_j, _ = rotatable_bonds.as_array()[bond_number] axis = coord[atom_j] - coord[atom_i] # --- get support atom --- support = coord[atom_i] # --- need to get atoms only on one side of the bond --- bond_list_without_axis = mol.bonds.copy() bond_list_without_axis.remove_bond(atom_i, atom_j) rotated_atom_indices = struc.find_connected(bond_list_without_axis, root=atom_j) # --- rotate atoms --- coord[rotated_atom_indices] = struc.rotate_about_axis( coord[rotated_atom_indices], axis, angle, support) atom_list = [] for i, atom_i in enumerate(mol): atom_new = struc.Atom(coord[i], atom_name=atom_i.atom_name, element=atom_i.element) atom_list.append(atom_new) new_mol = struc.array(atom_list) new_mol.res_id[:] = mol.res_id new_mol.res_name[:] = mol.res_name new_mol.bonds = mol.bonds.copy() return new_mol
def test_gro_id_overflow(): # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000 num_atoms = 100005 atoms = array([ Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=i + 1) for i in range(num_atoms) ]) atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # Write .gro file tmp_file_name = biotite.temp_file(".gro") io.save_structure(tmp_file_name, atoms) # Read .gro file gro_file = gro.GROFile.read(tmp_file_name) s = gro_file.get_structure() assert s.array_length() == num_atoms
def test_gro_no_box(): """ .gro file format requires valid box parameters at the end of each model. However, if we read such a file in, the resulting object should not have an assigned box. """ # Create an AtomArray atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1) atoms = array([atom]) # Write .gro file tmp_file_name = biotite.temp_file(".gro") io.save_structure(tmp_file_name, atoms) # Read in file gro_file = gro.GROFile.read(tmp_file_name) s = gro_file.get_structure() # Assert no box with 0 dimension assert s.box is None
# Creating atoms to build molecules with carbon = Atom([0, 0, 0], element="C") hydrogen = Atom([0, 0, 0], element="H") oxygen = Atom([0, 0, 0], element="O") nitrogen = Atom([0, 0, 0], element="N") fluorine = Atom([0, 0, 0], element="F") sulfur = Atom([0, 0, 0], element="S") # Building molecules methane = array([carbon, hydrogen, hydrogen, hydrogen, hydrogen]) methane.bonds = BondList(methane.array_length(), np.array([[0, 1], [0, 2], [0, 3], [0, 4]])) mol_length = methane.array_length() methane.charge = np.array([0] * mol_length) ethane = array([ carbon, carbon, hydrogen, hydrogen, hydrogen, hydrogen, hydrogen, hydrogen ]) ethane.bonds = BondList( ethane.array_length(), np.array([[0, 1], [0, 2], [0, 3], [0, 4], [1, 5], [1, 6], [1, 7]])) mol_length = ethane.array_length() ethane.charge = np.array([0] * mol_length) ethylene = array([carbon, carbon, hydrogen, hydrogen, hydrogen, hydrogen])
def array(atom_list): return struc.array(atom_list)
def assemble_peptide(sequence): res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence] peptide = struc.AtomArray(length=0) for res_id, res_name, connect_angle in zip( np.arange(1, len(res_names) + 1), res_names, itertools.cycle([120, -120])): # Create backbone atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N") atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0], atom_name="CA", element="C") coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord, atom_n.coord, 120, CA_C_LENGTH) atom_c = struc.Atom(coord_c, atom_name="C", element="C") coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord, atom_ca.coord, 120, C_O_DOUBLE_LENGTH) atom_o = struc.Atom(coord_o, atom_name="O", element="O") coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_ca.coord, -120, N_H_LENGTH) atom_h = struc.Atom(coord_h, atom_name="H", element="H") backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h]) backbone.res_id[:] = res_id backbone.res_name[:] = res_name # Add bonds between backbone atoms bonds = struc.BondList(backbone.array_length()) bonds.add_bond(0, 1, struc.BondType.SINGLE) # N-CA bonds.add_bond(1, 2, struc.BondType.SINGLE) # CA-C bonds.add_bond(2, 3, struc.BondType.DOUBLE) # C-O bonds.add_bond(0, 4, struc.BondType.SINGLE) # N-H backbone.bonds = bonds # Get residue from dataset residue = info.residue(res_name) # Superimpose backbone of residue # with backbone created previously _, transformation = struc.superimpose( backbone[struc.filter_backbone(backbone)], residue[struc.filter_backbone(residue)]) residue = struc.superimpose_apply(residue, transformation) # Remove backbone atoms from residue because they are already # existing in the backbone created prevoisly side_chain = residue[~np.isin( residue. atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])] # Assemble backbone with side chain (including HA) # and set annotation arrays residue = backbone + side_chain residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE) residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE) residue.chain_id[:] = "A" residue.res_id[:] = res_id residue.res_name[:] = res_name peptide += residue # Connect current residue to existing residues in the chain if res_id > 1: index_prev_ca = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "CA"))[0][0] index_prev_c = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "C"))[0][0] index_curr_n = np.where((peptide.res_id == res_id) & (peptide.atom_name == "N"))[0][0] index_curr_c = np.where((peptide.res_id == res_id) & (peptide.atom_name == "C"))[0][0] curr_residue_mask = peptide.res_id == res_id # Adjust geometry curr_coord_n = calculate_atom_coord_by_z_rotation( peptide.coord[index_prev_c], peptide.coord[index_prev_ca], connect_angle, C_N_LENGTH) peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n] peptide.coord[curr_residue_mask] += curr_coord_n # Adjacent residues should show in opposing directions # -> rotate residues with even residue ID by 180 degrees if res_id % 2 == 0: coord_n = peptide.coord[index_curr_n] coord_c = peptide.coord[index_curr_c] peptide.coord[curr_residue_mask] = struc.rotate_about_axis( atoms=peptide.coord[curr_residue_mask], axis=coord_c - coord_n, angle=np.deg2rad(180), support=coord_n) # Add bond between previous C and current N peptide.bonds.add_bond(index_prev_c, index_curr_n, struc.BondType.SINGLE) # Add N-terminal hydrogen atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0] atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0] coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord, -120, N_H_LENGTH) atom_h2 = struc.Atom(coord_h2, chain_id="A", res_id=1, res_name=atom_h.res_name, atom_name="H2", element="H") peptide = struc.array([atom_h2]) + peptide peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE) # H2-N # Add C-terminal hydroxyl group last_id = len(sequence) index_c = np.where((peptide.res_id == last_id) & (peptide.atom_name == "C"))[0][0] index_o = np.where((peptide.res_id == last_id) & (peptide.atom_name == "O"))[0][0] coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c], peptide.coord[index_o], connect_angle, C_O_LENGTH) coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt, peptide.coord[index_c], connect_angle, O_H_LENGTH) atom_oxt = struc.Atom(coord_oxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="OXT", element="O") atom_hxt = struc.Atom(coord_hxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="HXT", element="H") peptide = peptide + struc.array([atom_oxt, atom_hxt]) peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE) # C-OXT peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE) # OXT-HXT return peptide
# fewer cases :class:`Atom` instances are created as it is done in the # above example. # # If you want to work with an entire molecular structure, containing an # arbitrary amount of atoms, you have to use so called atom arrays. # An atom array can be seen as an array of atom instances # (hence the name). # But instead of storing :class:`Atom` instances in a list, an # :class:`AtomArray` instance contains one :class:`ndarray` for each # annotation and the coordinates. # In order to see this in action, we first have to create an array from # the atoms we constructed before. # Then we can access the annotations and coordinates of the atom array # simply by specifying the attribute. array = struc.array([atom1, atom2, atom3]) print("Chain ID:", array.chain_id) print("Residue ID:", array.res_id) print("Atom name:", array.atom_name) print("Coordinates:", array.coord) print() print(array) ######################################################################## # The :func:`array()` builder function takes any iterable object # containing :class:`Atom` instances. # If you wanted to, you could even use another :class:`AtomArray`, which # functions also as an iterable object of :class:`Atom` objects. # An alternative way of constructing an array would be creating an # :class:`AtomArray` by using its constructor, which fills the # annotation arrays and coordinates with the type respective *zero*
def get_atom_features(structure_input, xtc_input, atomgroup, element, grid_input=None, top_atoms=None, write=None, pdb_vis=True, grid_write=None): u = mda.Universe(structure_input, xtc_input) if pdb_vis is True: protein = u.select_atoms("protein") pdb_outname = structure_input[0:-4] + "_IonSites.pdb" u.trajectory[0] protein.write(pdb_outname) ## The density will be obtained from the universe which depends on the .xtc and .gro if grid_input is None: density_atomgroup = u.select_atoms("name " + atomgroup) D = DensityAnalysis(density_atomgroup, delta=1.0) D.run() if grid_write is not None: D.density.convert_density("Angstrom^{-3}") D.density.export(structure_input[:-4] + atomgroup + "_density.dx", type="double") grid_input = atomgroup + "_density.dx" g = D.density else: g = Grid(grid_input) ##converting the density to a probability atom_number = len(u.select_atoms('name ' + atomgroup)) grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid)) ##mask all probabilities below the average water probability average_probability_density = atom_number / sum( 1 for i in grid_data.flat if i) ##mask all grid centers with density less than threshold density grid_data[grid_data <= average_probability_density] = 0.0 xyz, val = local_maxima_3D(grid_data) ##negate the array to get descending order from most prob to least prob val_sort = np.argsort(-1 * val.copy()) # values = [val[i] for i in val_sort] coords = [xyz[i] for i in val_sort] maxdens_coord_str = [str(item)[1:-1] for item in coords] atom_frequencies = [] if top_atoms is None: top_atoms = len(coords) elif top_atoms > len(coords): top_atoms = len(coords) print('\n') print('Featurizing ', top_atoms, ' Atoms') for atom_no in range(top_atoms): print('\n') print('Atom no: ', atom_no) print('\n') counting = [] for i in tqdm(range(len(u.trajectory))): # for i in tqdm(range(100)): u.trajectory[i] ##list all water resids within sphere of radius 2 centered on water prob density maxima atomgroup_IDS = list( u.select_atoms('name ' + atomgroup + ' and point ' + maxdens_coord_str[atom_no] + ' 2').indices) ##select only those resids that have all three atoms within the water pocket if len(atomgroup_IDS) == 0: atomgroup_IDS = [-1] counting.append(atomgroup_IDS) atom_ID = element + str(atom_no + 1) pocket_occupation_frequency = 1 - counting.count(-1) / len(counting) atom_location = coords[atom_no] + g.origin atom_frequencies.append( [atom_ID, atom_location, pocket_occupation_frequency]) ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed if pdb_vis is True: # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) # Shifting the coordinates by the grid origin atom_location = coords[atom_no] + g.origin # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="W", # The residue ID is the last ID in the file +1 res_id=atom_array.res_id[-1] + 1, res_name=atom_ID, hetero=True, atom_name=atomgroup, element=element) atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array) if pdb_vis is True: u_pdb = mda.Universe(pdb_outname) u_pdb.add_TopologyAttr('tempfactors') # Write values as beta-factors ("tempfactors") to a PDB file for res in range(len(atom_frequencies)): atom_resid = len(u_pdb.residues) - top_atoms + res u_pdb.residues[atom_resid].atoms.tempfactors = atom_frequencies[ res][2] u_pdb.atoms.write(pdb_outname) if write is True: if not os.path.exists('atom_features/'): os.makedirs('atom_features/') filename = 'atom_features/PocketFrequencies.txt' with open(filename, 'w') as output: for row in atom_frequencies: output.write(str(row)[1:-1] + '\n') return atom_frequencies
def get_water_features(structure_input, xtc_input, atomgroup, grid_wat_model=None, grid_input=None, top_waters=30, write=None, pdb_vis=True): u = mda.Universe(structure_input, xtc_input) if pdb_vis is True: protein = u.select_atoms("protein") pdb_outname = structure_input[0:-4] + "_WaterSites.pdb" u.trajectory[0] protein.write(pdb_outname) if grid_input is None: density_atomgroup = u.select_atoms("name " + atomgroup) # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box D = DensityAnalysis(density_atomgroup, delta=1.0) D.run() if grid_wat_model is not None: D.density.convert_density(grid_wat_model) D.density.export(structure_input[:-4] + atomgroup + "_density.dx", type="double") grid_input = atomgroup + "_density.dx" g = D.density else: g = Grid(grid_input) xyz, val = local_maxima_3D(g.grid) ##negate the array to get descending order from most prob to least prob val_sort = np.argsort(-1 * val.copy()) coords = [xyz[i] for i in val_sort] maxdens_coord_str = [str(item)[1:-1] for item in coords] water_frequencies = [] if top_waters > len(coords): top_waters = len(coords) print('\n') print('Featurizing ', top_waters, ' Waters') for wat_no in range(top_waters): print('\n') print('Water no: ', wat_no) print('\n') philist = [] psilist = [] ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound counting = [] for frame_no in tqdm(range(len(u.trajectory))): # for frame_no in tqdm(range(100)): u.trajectory[frame_no] ##list all water oxygens within sphere of radius X centered on water prob density maxima radius = ' 3.5' atomgroup_IDS = u.select_atoms('name ' + atomgroup + ' and point ' + maxdens_coord_str[wat_no] + radius).indices counting.append(atomgroup_IDS) ##making a list of the water IDs that appear in the simulation in that pocket flat_list = [item for sublist in counting for item in sublist] ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound # for frame_no in tqdm(range(100)): for frame_no in tqdm(range(len(u.trajectory))): u.trajectory[frame_no] waters_resid = counting[frame_no] ##extracting the water coordinates for inside the pocket if len(waters_resid) == 1: ##(x,y,z) positions for the water atom (residue) at frame i water_atom_positions = [ list(pos) for pos in u.select_atoms('byres index ' + str(waters_resid[0])).positions ] psi, phi = get_dipole(water_atom_positions) psilist.append(psi) philist.append(phi) ##if multiple waters in pocket then use water with largest frequency of pocket occupation elif len(waters_resid) > 1: freq_count = [] for ID in waters_resid: freq_count.append([flat_list.count(ID), ID]) freq_count.sort(key=lambda x: x[0]) water_atom_positions = [ list(pos) for pos in u.select_atoms('byres index ' + str(freq_count[-1][1])).positions ] psi, phi = get_dipole(water_atom_positions) psilist.append(psi) philist.append(phi) ##10000.0 = no waters bound elif len(waters_resid) < 1: psilist.append(10000.0) philist.append(10000.0) water_out = [psilist, philist] water_ID = "O" + str(wat_no + 1) water_pocket_occupation_frequency = 1 - psilist.count(10000.0) / len( psilist) atom_location = coords[wat_no] + g.origin water_frequencies.append( [water_ID, atom_location, water_pocket_occupation_frequency]) ##WRITE OUT WATER FEATURES INTO SUBDIRECTORY if write is True: if not os.path.exists('water_features/'): os.makedirs('water_features/') filename = 'water_features/' + structure_input[ 0:-4] + water_ID + '.txt' with open(filename, 'w') as output: for row in water_out: output.write(str(row)[1:-1] + '\n') ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed if pdb_vis is True: # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) # Shifting the coordinates by the grid origin atom_location = coords[wat_no] + g.origin # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="W", # The residue ID is the last ID in the file +1 res_id=atom_array.res_id[-1] + 1, res_name=water_ID, hetero=True, atom_name=atomgroup, element="O") atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array) if pdb_vis is True: u_pdb = mda.Universe(pdb_outname) u_pdb.add_TopologyAttr('tempfactors') # Write values as beta-factors ("tempfactors") to a PDB file for res in range(len(water_frequencies)): #scale the water resid by the starting resid water_resid = len(u_pdb.residues) - top_waters + res u_pdb.residues[water_resid].atoms.tempfactors = water_frequencies[ res][2] u_pdb.atoms.write(pdb_outname) if write is True: filename = 'water_features/' + structure_input[ 0:-4] + 'WaterPocketFrequencies.txt' with open(filename, 'w') as output: for row in water_frequencies: output.write(str(row)[1:-1] + '\n') return water_frequencies