def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup): """ Write a new atom to a reference structure to visualise conserved non-protein atom sites. Parameters ---------- pdb_outname : str Filename of reference structure. atom_location : array (x,y,z) coordinates of the atom location with respect to the reference structure. atom_ID : str A unique ID for the atom. atomgroup : str MDAnalysis atomgroup to describe the atom. """ ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) res_id = atom_array.res_id[-1] + 1 # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="X", # The residue ID is the last ID in the file +1 res_id=res_id, res_name=atom_ID, hetero=True, atom_name=atomgroup, element="O") atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array)
def atom_list(): chain_id = ["A", "A", "B", "B", "B"] res_id = [1, 1, 1, 1, 2] res_name = ["ALA", "ALA", "PRO", "PRO", "MSE"] hetero = [False, False, False, False, True] atom_name = ["N", "CA", "O", "CA", "SE"] element = ["N", "C", "O", "C", "SE"] atom_list = [] for i in range(5): atom_list.append( struc.Atom([i, i, i], chain_id=chain_id[i], res_id=res_id[i], res_name=res_name[i], hetero=hetero[i], atom_name=atom_name[i], element=element[i])) return atom_list
def rotate_residue(mol, bond_number, angle): # --- Identify rotatable bonds --- rotatable_bonds = struc.find_rotatable_bonds(mol.bonds) # --- do not rotate about backbone bonds --- for atom_name in BACKBONE: index = np.where(mol.atom_name == atom_name)[0][0] rotatable_bonds.remove_bonds_to(index) # --- init coordinates for new model --- coord = mol.coord.copy() # --- get bond axis --- atom_i, atom_j, _ = rotatable_bonds.as_array()[bond_number] axis = coord[atom_j] - coord[atom_i] # --- get support atom --- support = coord[atom_i] # --- need to get atoms only on one side of the bond --- bond_list_without_axis = mol.bonds.copy() bond_list_without_axis.remove_bond(atom_i, atom_j) rotated_atom_indices = struc.find_connected(bond_list_without_axis, root=atom_j) # --- rotate atoms --- coord[rotated_atom_indices] = struc.rotate_about_axis( coord[rotated_atom_indices], axis, angle, support) atom_list = [] for i, atom_i in enumerate(mol): atom_new = struc.Atom(coord[i], atom_name=atom_i.atom_name, element=atom_i.element) atom_list.append(atom_new) new_mol = struc.array(atom_list) new_mol.res_id[:] = mol.res_id new_mol.res_name[:] = mol.res_name new_mol.bonds = mol.bonds.copy() return new_mol
def assemble_peptide(sequence): res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence] peptide = struc.AtomArray(length=0) for res_id, res_name, connect_angle in zip( np.arange(1, len(res_names) + 1), res_names, itertools.cycle([120, -120])): # Create backbone atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N") atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0], atom_name="CA", element="C") coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord, atom_n.coord, 120, CA_C_LENGTH) atom_c = struc.Atom(coord_c, atom_name="C", element="C") coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord, atom_ca.coord, 120, C_O_DOUBLE_LENGTH) atom_o = struc.Atom(coord_o, atom_name="O", element="O") coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_ca.coord, -120, N_H_LENGTH) atom_h = struc.Atom(coord_h, atom_name="H", element="H") backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h]) backbone.res_id[:] = res_id backbone.res_name[:] = res_name # Add bonds between backbone atoms bonds = struc.BondList(backbone.array_length()) bonds.add_bond(0, 1, struc.BondType.SINGLE) # N-CA bonds.add_bond(1, 2, struc.BondType.SINGLE) # CA-C bonds.add_bond(2, 3, struc.BondType.DOUBLE) # C-O bonds.add_bond(0, 4, struc.BondType.SINGLE) # N-H backbone.bonds = bonds # Get residue from dataset residue = info.residue(res_name) # Superimpose backbone of residue # with backbone created previously _, transformation = struc.superimpose( backbone[struc.filter_backbone(backbone)], residue[struc.filter_backbone(residue)]) residue = struc.superimpose_apply(residue, transformation) # Remove backbone atoms from residue because they are already # existing in the backbone created prevoisly side_chain = residue[~np.isin( residue. atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])] # Assemble backbone with side chain (including HA) # and set annotation arrays residue = backbone + side_chain residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE) residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE) residue.chain_id[:] = "A" residue.res_id[:] = res_id residue.res_name[:] = res_name peptide += residue # Connect current residue to existing residues in the chain if res_id > 1: index_prev_ca = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "CA"))[0][0] index_prev_c = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "C"))[0][0] index_curr_n = np.where((peptide.res_id == res_id) & (peptide.atom_name == "N"))[0][0] index_curr_c = np.where((peptide.res_id == res_id) & (peptide.atom_name == "C"))[0][0] curr_residue_mask = peptide.res_id == res_id # Adjust geometry curr_coord_n = calculate_atom_coord_by_z_rotation( peptide.coord[index_prev_c], peptide.coord[index_prev_ca], connect_angle, C_N_LENGTH) peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n] peptide.coord[curr_residue_mask] += curr_coord_n # Adjacent residues should show in opposing directions # -> rotate residues with even residue ID by 180 degrees if res_id % 2 == 0: coord_n = peptide.coord[index_curr_n] coord_c = peptide.coord[index_curr_c] peptide.coord[curr_residue_mask] = struc.rotate_about_axis( atoms=peptide.coord[curr_residue_mask], axis=coord_c - coord_n, angle=np.deg2rad(180), support=coord_n) # Add bond between previous C and current N peptide.bonds.add_bond(index_prev_c, index_curr_n, struc.BondType.SINGLE) # Add N-terminal hydrogen atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0] atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0] coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord, -120, N_H_LENGTH) atom_h2 = struc.Atom(coord_h2, chain_id="A", res_id=1, res_name=atom_h.res_name, atom_name="H2", element="H") peptide = struc.array([atom_h2]) + peptide peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE) # H2-N # Add C-terminal hydroxyl group last_id = len(sequence) index_c = np.where((peptide.res_id == last_id) & (peptide.atom_name == "C"))[0][0] index_o = np.where((peptide.res_id == last_id) & (peptide.atom_name == "O"))[0][0] coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c], peptide.coord[index_o], connect_angle, C_O_LENGTH) coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt, peptide.coord[index_c], connect_angle, O_H_LENGTH) atom_oxt = struc.Atom(coord_oxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="OXT", element="O") atom_hxt = struc.Atom(coord_hxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="HXT", element="H") peptide = peptide + struc.array([atom_oxt, atom_hxt]) peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE) # C-OXT peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE) # OXT-HXT return peptide
.. Note:: The universal length unit in *Biotite* is Å. This includes coordinates, distances, surface areas, etc. Creating structures ------------------- Let's begin by constructing some atoms: """ import biotite.structure as struc atom1 = struc.Atom([0, 0, 0], chain_id="A", res_id=1, res_name="GLY", hetero=False, atom_name="N", element="N") atom2 = struc.Atom([0, 1, 1], chain_id="A", res_id=1, res_name="GLY", hetero=False, atom_name="CA", element="C") atom3 = struc.Atom([0, 0, 2], chain_id="A", res_id=1, res_name="GLY", hetero=False,
def get_atom_features(structure_input, xtc_input, atomgroup, element, grid_input=None, top_atoms=None, write=None, pdb_vis=True, grid_write=None): u = mda.Universe(structure_input, xtc_input) if pdb_vis is True: protein = u.select_atoms("protein") pdb_outname = structure_input[0:-4] + "_IonSites.pdb" u.trajectory[0] protein.write(pdb_outname) ## The density will be obtained from the universe which depends on the .xtc and .gro if grid_input is None: density_atomgroup = u.select_atoms("name " + atomgroup) D = DensityAnalysis(density_atomgroup, delta=1.0) D.run() if grid_write is not None: D.density.convert_density("Angstrom^{-3}") D.density.export(structure_input[:-4] + atomgroup + "_density.dx", type="double") grid_input = atomgroup + "_density.dx" g = D.density else: g = Grid(grid_input) ##converting the density to a probability atom_number = len(u.select_atoms('name ' + atomgroup)) grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid)) ##mask all probabilities below the average water probability average_probability_density = atom_number / sum( 1 for i in grid_data.flat if i) ##mask all grid centers with density less than threshold density grid_data[grid_data <= average_probability_density] = 0.0 xyz, val = local_maxima_3D(grid_data) ##negate the array to get descending order from most prob to least prob val_sort = np.argsort(-1 * val.copy()) # values = [val[i] for i in val_sort] coords = [xyz[i] for i in val_sort] maxdens_coord_str = [str(item)[1:-1] for item in coords] atom_frequencies = [] if top_atoms is None: top_atoms = len(coords) elif top_atoms > len(coords): top_atoms = len(coords) print('\n') print('Featurizing ', top_atoms, ' Atoms') for atom_no in range(top_atoms): print('\n') print('Atom no: ', atom_no) print('\n') counting = [] for i in tqdm(range(len(u.trajectory))): # for i in tqdm(range(100)): u.trajectory[i] ##list all water resids within sphere of radius 2 centered on water prob density maxima atomgroup_IDS = list( u.select_atoms('name ' + atomgroup + ' and point ' + maxdens_coord_str[atom_no] + ' 2').indices) ##select only those resids that have all three atoms within the water pocket if len(atomgroup_IDS) == 0: atomgroup_IDS = [-1] counting.append(atomgroup_IDS) atom_ID = element + str(atom_no + 1) pocket_occupation_frequency = 1 - counting.count(-1) / len(counting) atom_location = coords[atom_no] + g.origin atom_frequencies.append( [atom_ID, atom_location, pocket_occupation_frequency]) ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed if pdb_vis is True: # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) # Shifting the coordinates by the grid origin atom_location = coords[atom_no] + g.origin # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="W", # The residue ID is the last ID in the file +1 res_id=atom_array.res_id[-1] + 1, res_name=atom_ID, hetero=True, atom_name=atomgroup, element=element) atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array) if pdb_vis is True: u_pdb = mda.Universe(pdb_outname) u_pdb.add_TopologyAttr('tempfactors') # Write values as beta-factors ("tempfactors") to a PDB file for res in range(len(atom_frequencies)): atom_resid = len(u_pdb.residues) - top_atoms + res u_pdb.residues[atom_resid].atoms.tempfactors = atom_frequencies[ res][2] u_pdb.atoms.write(pdb_outname) if write is True: if not os.path.exists('atom_features/'): os.makedirs('atom_features/') filename = 'atom_features/PocketFrequencies.txt' with open(filename, 'w') as output: for row in atom_frequencies: output.write(str(row)[1:-1] + '\n') return atom_frequencies
def get_water_features(structure_input, xtc_input, atomgroup, grid_wat_model=None, grid_input=None, top_waters=30, write=None, pdb_vis=True): u = mda.Universe(structure_input, xtc_input) if pdb_vis is True: protein = u.select_atoms("protein") pdb_outname = structure_input[0:-4] + "_WaterSites.pdb" u.trajectory[0] protein.write(pdb_outname) if grid_input is None: density_atomgroup = u.select_atoms("name " + atomgroup) # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box D = DensityAnalysis(density_atomgroup, delta=1.0) D.run() if grid_wat_model is not None: D.density.convert_density(grid_wat_model) D.density.export(structure_input[:-4] + atomgroup + "_density.dx", type="double") grid_input = atomgroup + "_density.dx" g = D.density else: g = Grid(grid_input) xyz, val = local_maxima_3D(g.grid) ##negate the array to get descending order from most prob to least prob val_sort = np.argsort(-1 * val.copy()) coords = [xyz[i] for i in val_sort] maxdens_coord_str = [str(item)[1:-1] for item in coords] water_frequencies = [] if top_waters > len(coords): top_waters = len(coords) print('\n') print('Featurizing ', top_waters, ' Waters') for wat_no in range(top_waters): print('\n') print('Water no: ', wat_no) print('\n') philist = [] psilist = [] ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound counting = [] for frame_no in tqdm(range(len(u.trajectory))): # for frame_no in tqdm(range(100)): u.trajectory[frame_no] ##list all water oxygens within sphere of radius X centered on water prob density maxima radius = ' 3.5' atomgroup_IDS = u.select_atoms('name ' + atomgroup + ' and point ' + maxdens_coord_str[wat_no] + radius).indices counting.append(atomgroup_IDS) ##making a list of the water IDs that appear in the simulation in that pocket flat_list = [item for sublist in counting for item in sublist] ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound # for frame_no in tqdm(range(100)): for frame_no in tqdm(range(len(u.trajectory))): u.trajectory[frame_no] waters_resid = counting[frame_no] ##extracting the water coordinates for inside the pocket if len(waters_resid) == 1: ##(x,y,z) positions for the water atom (residue) at frame i water_atom_positions = [ list(pos) for pos in u.select_atoms('byres index ' + str(waters_resid[0])).positions ] psi, phi = get_dipole(water_atom_positions) psilist.append(psi) philist.append(phi) ##if multiple waters in pocket then use water with largest frequency of pocket occupation elif len(waters_resid) > 1: freq_count = [] for ID in waters_resid: freq_count.append([flat_list.count(ID), ID]) freq_count.sort(key=lambda x: x[0]) water_atom_positions = [ list(pos) for pos in u.select_atoms('byres index ' + str(freq_count[-1][1])).positions ] psi, phi = get_dipole(water_atom_positions) psilist.append(psi) philist.append(phi) ##10000.0 = no waters bound elif len(waters_resid) < 1: psilist.append(10000.0) philist.append(10000.0) water_out = [psilist, philist] water_ID = "O" + str(wat_no + 1) water_pocket_occupation_frequency = 1 - psilist.count(10000.0) / len( psilist) atom_location = coords[wat_no] + g.origin water_frequencies.append( [water_ID, atom_location, water_pocket_occupation_frequency]) ##WRITE OUT WATER FEATURES INTO SUBDIRECTORY if write is True: if not os.path.exists('water_features/'): os.makedirs('water_features/') filename = 'water_features/' + structure_input[ 0:-4] + water_ID + '.txt' with open(filename, 'w') as output: for row in water_out: output.write(str(row)[1:-1] + '\n') ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed if pdb_vis is True: # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) # Shifting the coordinates by the grid origin atom_location = coords[wat_no] + g.origin # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="W", # The residue ID is the last ID in the file +1 res_id=atom_array.res_id[-1] + 1, res_name=water_ID, hetero=True, atom_name=atomgroup, element="O") atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array) if pdb_vis is True: u_pdb = mda.Universe(pdb_outname) u_pdb.add_TopologyAttr('tempfactors') # Write values as beta-factors ("tempfactors") to a PDB file for res in range(len(water_frequencies)): #scale the water resid by the starting resid water_resid = len(u_pdb.residues) - top_waters + res u_pdb.residues[water_resid].atoms.tempfactors = water_frequencies[ res][2] u_pdb.atoms.write(pdb_outname) if write is True: filename = 'water_features/' + structure_input[ 0:-4] + 'WaterPocketFrequencies.txt' with open(filename, 'w') as output: for row in water_frequencies: output.write(str(row)[1:-1] + '\n') return water_frequencies