Пример #1
0
def test_gro_no_box():
    """
    .gro file format requires valid box parameters at the end of each
    model. However, if we read such a file in, the resulting object should not
    need to have an assigned box.
    """

    # Create an AtomArray
    atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1)
    atoms = array([atom])

    # Write .gro file
    temp = TemporaryFile("w+")
    gro_file = gro.GROFile()
    gro_file.set_structure(atoms)
    gro_file.write(temp)

    # Read in file
    temp.seek(0)
    gro_file = gro.GROFile.read(temp)
    temp.close()
    s = gro_file.get_structure()

    # Assert no box with 0 dimension
    assert s.box is None
Пример #2
0
def test_valence_state_not_parametrized():
    """
    Test case in which parameters for a certain valence state of a
    generally parametrized atom are not available.
    In our case, it is sulfur having a double bond, i. e. only one
    binding partner.
    For this purpose, a fictitious molecule consisting of a central
    carbon bound to two hydrogen atoms via single bonds and to one
    sulfur atom via a double bond is created and tested.
    The expectations are the following: the sulfur's partial charge to
    be NaN and the carbons's partial charge to be smaller than that of
    the two hydrogens.
    """
    with pytest.warns(UserWarning):
        fictitious_molecule = array([carbon, sulfur, hydrogen, hydrogen])
        fictitious_molecule.bonds = BondList(
            fictitious_molecule.array_length(),
            np.array([[0, 1], [0, 2], [0, 3]]))
        mol_length = fictitious_molecule.array_length()
        fictitious_molecule.charge = np.array([0] * mol_length)
        charges = partial_charges(fictitious_molecule)
        sulfur_part_charge = charges[1]
        carb_part_charge = charges[0]
        hyd_part_charge = charges[2]
    assert np.isnan(sulfur_part_charge)
    assert carb_part_charge < hyd_part_charge
Пример #3
0
def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup):
    """
    Write a new atom to a reference structure to visualise conserved non-protein atom sites.

    Parameters
    ----------
    pdb_outname : str
        Filename of reference structure.
    atom_location : array
        (x,y,z) coordinates of the atom location with respect to the reference structure.
    atom_ID : str
        A unique ID for the atom.
    atomgroup : str
        MDAnalysis atomgroup to describe the atom.

    """

    ##PDB_VISUALISATION
    ##rescursively add waters to the pdb file one by one as they are processed
    # # Read the file into Biotite's structure object (atom array)
    atom_array = strucio.load_structure(pdb_outname)
    res_id = atom_array.res_id[-1] + 1
    # Add an HETATM
    atom = struc.Atom(
        coord=atom_location,
        chain_id="X",
        # The residue ID is the last ID in the file +1
        res_id=res_id,
        res_name=atom_ID,
        hetero=True,
        atom_name=atomgroup,
        element="O")
    atom_array += struc.array([atom])
    # Save edited structure
    strucio.save_structure(pdb_outname, atom_array)
Пример #4
0
def test_gro_id_overflow():
    # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000
    num_atoms = 100005
    atoms = array([
        Atom([1, 2, 3],
             atom_name="CA",
             element="C",
             res_name="X",
             res_id=i + 1) for i in range(num_atoms)
    ])
    atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

    # Write .gro file
    temp = TemporaryFile("w+")
    gro_file = gro.GROFile()
    gro_file.set_structure(atoms)
    gro_file.write(temp)

    # Read .gro file
    temp.seek(0)
    gro_file = gro.GROFile.read(temp)
    temp.close()
    s = gro_file.get_structure()

    assert s.array_length() == num_atoms
Пример #5
0
def array():
    atom_list = []
    atom_list.append(Atom([0.5, 2.5, 1.0]))
    atom_list.append(Atom([0.5, 2.7, 1.0]))
    atom_list.append(Atom([1.5, 1.5, 1.0]))
    atom_list.append(Atom([2.5, 0.5, 1.0]))
    atom_list.append(Atom([2.5, 0.7, 1.0]))
    atom_list.append(Atom([2.5, 0.5, 1.1]))
    return struc.array(atom_list)
Пример #6
0
def test_array_from_atoms(atom_list):
    """
    Check whether custom annotations in :class:`Atom` objects are
    properly carried over to the :class:`AtomArray` when using
    :func:`array()`.
    """
    for atom in atom_list:
        atom.some_annotation = 42
    array = struc.array(atom_list)
    assert np.all(array.some_annotation == np.full(array.array_length(), 42))
    assert np.issubdtype(array.some_annotation.dtype, np.integer)
Пример #7
0
def test_correct_output_ions():
    """
    Ions such as sodium or potassium are not parametrized. However,
    their formal charge is taken as partial charge since they are not
    involved in covalent bonding.
    Hence, it is expected that no warning is raised.
    The test is performed with a sodium ion.
    """
    sodium = Atom([0, 0, 0], element="NA")
    sodium_array = array([sodium])
    # Sodium possesses a formal charge of +1
    sodium_array.charge = np.array([1])
    # Sodium is not involved in covalent bonding
    sodium_array.bonds = BondList(sodium_array.array_length())
    with pytest.warns(None) as record:
        partial_charges(sodium_array, iteration_step_num=1)
    assert len(record) == 0
Пример #8
0
def rotate_residue(mol, bond_number, angle):

    # --- Identify rotatable bonds ---
    rotatable_bonds = struc.find_rotatable_bonds(mol.bonds)

    # --- do not rotate about backbone bonds ---
    for atom_name in BACKBONE:
        index = np.where(mol.atom_name == atom_name)[0][0]
        rotatable_bonds.remove_bonds_to(index)

    # --- init coordinates for new model ---
    coord = mol.coord.copy()

    # --- get bond axis ---
    atom_i, atom_j, _ = rotatable_bonds.as_array()[bond_number]
    axis = coord[atom_j] - coord[atom_i]

    # --- get support atom ---
    support = coord[atom_i]

    # --- need to get atoms only on one side of the bond ---
    bond_list_without_axis = mol.bonds.copy()
    bond_list_without_axis.remove_bond(atom_i, atom_j)
    rotated_atom_indices = struc.find_connected(bond_list_without_axis,
                                                root=atom_j)

    # --- rotate atoms ---
    coord[rotated_atom_indices] = struc.rotate_about_axis(
        coord[rotated_atom_indices], axis, angle, support)

    atom_list = []
    for i, atom_i in enumerate(mol):
        atom_new = struc.Atom(coord[i],
                              atom_name=atom_i.atom_name,
                              element=atom_i.element)
        atom_list.append(atom_new)
    new_mol = struc.array(atom_list)
    new_mol.res_id[:] = mol.res_id
    new_mol.res_name[:] = mol.res_name
    new_mol.bonds = mol.bonds.copy()

    return new_mol
Пример #9
0
def test_gro_id_overflow():
    # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000
    num_atoms = 100005
    atoms = array([
        Atom([1, 2, 3],
             atom_name="CA",
             element="C",
             res_name="X",
             res_id=i + 1) for i in range(num_atoms)
    ])
    atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read .gro file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    assert s.array_length() == num_atoms
Пример #10
0
def test_gro_no_box():
    """
    .gro file format requires valid box parameters at the end of each
    model. However, if we read such a file in, the resulting object should not
    have an assigned box.
    """

    # Create an AtomArray
    atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1)
    atoms = array([atom])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read in file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    # Assert no box with 0 dimension
    assert s.box is None
Пример #11
0
# Creating atoms to build molecules with
carbon = Atom([0, 0, 0], element="C")

hydrogen = Atom([0, 0, 0], element="H")

oxygen = Atom([0, 0, 0], element="O")

nitrogen = Atom([0, 0, 0], element="N")

fluorine = Atom([0, 0, 0], element="F")

sulfur = Atom([0, 0, 0], element="S")

# Building molecules
methane = array([carbon, hydrogen, hydrogen, hydrogen, hydrogen])
methane.bonds = BondList(methane.array_length(),
                         np.array([[0, 1], [0, 2], [0, 3], [0, 4]]))
mol_length = methane.array_length()
methane.charge = np.array([0] * mol_length)

ethane = array([
    carbon, carbon, hydrogen, hydrogen, hydrogen, hydrogen, hydrogen, hydrogen
])
ethane.bonds = BondList(
    ethane.array_length(),
    np.array([[0, 1], [0, 2], [0, 3], [0, 4], [1, 5], [1, 6], [1, 7]]))
mol_length = ethane.array_length()
ethane.charge = np.array([0] * mol_length)

ethylene = array([carbon, carbon, hydrogen, hydrogen, hydrogen, hydrogen])
Пример #12
0
def array(atom_list):
    return struc.array(atom_list)
Пример #13
0
def assemble_peptide(sequence):
    res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence]
    peptide = struc.AtomArray(length=0)

    for res_id, res_name, connect_angle in zip(
            np.arange(1,
                      len(res_names) + 1), res_names,
            itertools.cycle([120, -120])):
        # Create backbone
        atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N")

        atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0],
                             atom_name="CA",
                             element="C")

        coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord,
                                                     atom_n.coord, 120,
                                                     CA_C_LENGTH)
        atom_c = struc.Atom(coord_c, atom_name="C", element="C")

        coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord,
                                                     atom_ca.coord, 120,
                                                     C_O_DOUBLE_LENGTH)
        atom_o = struc.Atom(coord_o, atom_name="O", element="O")

        coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord,
                                                     atom_ca.coord, -120,
                                                     N_H_LENGTH)
        atom_h = struc.Atom(coord_h, atom_name="H", element="H")

        backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h])
        backbone.res_id[:] = res_id
        backbone.res_name[:] = res_name

        # Add bonds between backbone atoms
        bonds = struc.BondList(backbone.array_length())
        bonds.add_bond(0, 1, struc.BondType.SINGLE)  # N-CA
        bonds.add_bond(1, 2, struc.BondType.SINGLE)  # CA-C
        bonds.add_bond(2, 3, struc.BondType.DOUBLE)  # C-O
        bonds.add_bond(0, 4, struc.BondType.SINGLE)  # N-H
        backbone.bonds = bonds

        # Get residue from dataset
        residue = info.residue(res_name)
        # Superimpose backbone of residue
        # with backbone created previously
        _, transformation = struc.superimpose(
            backbone[struc.filter_backbone(backbone)],
            residue[struc.filter_backbone(residue)])
        residue = struc.superimpose_apply(residue, transformation)
        # Remove backbone atoms from residue because they are already
        # existing in the backbone created prevoisly
        side_chain = residue[~np.isin(
            residue.
            atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])]

        # Assemble backbone with side chain (including HA)
        # and set annotation arrays
        residue = backbone + side_chain
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE)
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE)
        residue.chain_id[:] = "A"
        residue.res_id[:] = res_id
        residue.res_name[:] = res_name
        peptide += residue

        # Connect current residue to existing residues in the chain
        if res_id > 1:
            index_prev_ca = np.where((peptide.res_id == res_id - 1)
                                     & (peptide.atom_name == "CA"))[0][0]
            index_prev_c = np.where((peptide.res_id == res_id - 1)
                                    & (peptide.atom_name == "C"))[0][0]
            index_curr_n = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "N"))[0][0]
            index_curr_c = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "C"))[0][0]
            curr_residue_mask = peptide.res_id == res_id

            # Adjust geometry
            curr_coord_n = calculate_atom_coord_by_z_rotation(
                peptide.coord[index_prev_c], peptide.coord[index_prev_ca],
                connect_angle, C_N_LENGTH)
            peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n]
            peptide.coord[curr_residue_mask] += curr_coord_n
            # Adjacent residues should show in opposing directions
            # -> rotate residues with even residue ID by 180 degrees
            if res_id % 2 == 0:
                coord_n = peptide.coord[index_curr_n]
                coord_c = peptide.coord[index_curr_c]
                peptide.coord[curr_residue_mask] = struc.rotate_about_axis(
                    atoms=peptide.coord[curr_residue_mask],
                    axis=coord_c - coord_n,
                    angle=np.deg2rad(180),
                    support=coord_n)

            # Add bond between previous C and current N
            peptide.bonds.add_bond(index_prev_c, index_curr_n,
                                   struc.BondType.SINGLE)

    # Add N-terminal hydrogen
    atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0]
    atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0]
    coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord,
                                                  -120, N_H_LENGTH)
    atom_h2 = struc.Atom(coord_h2,
                         chain_id="A",
                         res_id=1,
                         res_name=atom_h.res_name,
                         atom_name="H2",
                         element="H")
    peptide = struc.array([atom_h2]) + peptide
    peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE)  # H2-N

    # Add C-terminal hydroxyl group
    last_id = len(sequence)
    index_c = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "C"))[0][0]
    index_o = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "O"))[0][0]
    coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c],
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide
Пример #14
0
# fewer cases :class:`Atom` instances are created as it is done in the
# above example.
#
# If you want to work with an entire molecular structure, containing an
# arbitrary amount of atoms, you have to use so called atom arrays.
# An atom array can be seen as an array of atom instances
# (hence the name).
# But instead of storing :class:`Atom` instances in a list, an
# :class:`AtomArray` instance contains one :class:`ndarray` for each
# annotation and the coordinates.
# In order to see this in action, we first have to create an array from
# the atoms we constructed before.
# Then we can access the annotations and coordinates of the atom array
# simply by specifying the attribute.

array = struc.array([atom1, atom2, atom3])
print("Chain ID:", array.chain_id)
print("Residue ID:", array.res_id)
print("Atom name:", array.atom_name)
print("Coordinates:", array.coord)
print()
print(array)

########################################################################
# The :func:`array()` builder function takes any iterable object
# containing :class:`Atom` instances.
# If you wanted to, you could even use another :class:`AtomArray`, which
# functions also as an iterable object of :class:`Atom` objects.
# An alternative way of constructing an array would be creating an
# :class:`AtomArray` by using its constructor, which fills the
# annotation arrays and coordinates with the type respective *zero*
def get_atom_features(structure_input,
                      xtc_input,
                      atomgroup,
                      element,
                      grid_input=None,
                      top_atoms=None,
                      write=None,
                      pdb_vis=True,
                      grid_write=None):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_IonSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    ## The density will be obtained from the universe which depends on the .xtc and .gro
    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_write is not None:
            D.density.convert_density("Angstrom^{-3}")
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    ##converting the density to a probability
    atom_number = len(u.select_atoms('name ' + atomgroup))
    grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid))

    ##mask all probabilities below the average water probability
    average_probability_density = atom_number / sum(
        1 for i in grid_data.flat if i)
    ##mask all grid centers with density less than threshold density
    grid_data[grid_data <= average_probability_density] = 0.0

    xyz, val = local_maxima_3D(grid_data)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    # values = [val[i] for i in val_sort]
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]

    atom_frequencies = []

    if top_atoms is None:
        top_atoms = len(coords)
    elif top_atoms > len(coords):
        top_atoms = len(coords)

    print('\n')
    print('Featurizing ', top_atoms, ' Atoms')
    for atom_no in range(top_atoms):
        print('\n')
        print('Atom no: ', atom_no)
        print('\n')

        counting = []
        for i in tqdm(range(len(u.trajectory))):
            # for i in tqdm(range(100)):
            u.trajectory[i]
            ##list all water resids within sphere of radius 2 centered on water prob density maxima
            atomgroup_IDS = list(
                u.select_atoms('name ' + atomgroup + ' and point ' +
                               maxdens_coord_str[atom_no] + ' 2').indices)
            ##select only those resids that have all three atoms within the water pocket
            if len(atomgroup_IDS) == 0:
                atomgroup_IDS = [-1]
            counting.append(atomgroup_IDS)

        atom_ID = element + str(atom_no + 1)
        pocket_occupation_frequency = 1 - counting.count(-1) / len(counting)
        atom_location = coords[atom_no] + g.origin

        atom_frequencies.append(
            [atom_ID, atom_location, pocket_occupation_frequency])

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[atom_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=atom_ID,
                hetero=True,
                atom_name=atomgroup,
                element=element)
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)

        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(atom_frequencies)):
            atom_resid = len(u_pdb.residues) - top_atoms + res
            u_pdb.residues[atom_resid].atoms.tempfactors = atom_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        if not os.path.exists('atom_features/'):
            os.makedirs('atom_features/')
        filename = 'atom_features/PocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in atom_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return atom_frequencies
def get_water_features(structure_input,
                       xtc_input,
                       atomgroup,
                       grid_wat_model=None,
                       grid_input=None,
                       top_waters=30,
                       write=None,
                       pdb_vis=True):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_WaterSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_wat_model is not None:
            D.density.convert_density(grid_wat_model)
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    xyz, val = local_maxima_3D(g.grid)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]
    water_frequencies = []

    if top_waters > len(coords):
        top_waters = len(coords)

    print('\n')
    print('Featurizing ', top_waters, ' Waters')
    for wat_no in range(top_waters):
        print('\n')
        print('Water no: ', wat_no)
        print('\n')
        philist = []
        psilist = []

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        counting = []
        for frame_no in tqdm(range(len(u.trajectory))):
            # for frame_no in tqdm(range(100)):
            u.trajectory[frame_no]
            ##list all water oxygens within sphere of radius X centered on water prob density maxima
            radius = ' 3.5'
            atomgroup_IDS = u.select_atoms('name ' + atomgroup +
                                           ' and point ' +
                                           maxdens_coord_str[wat_no] +
                                           radius).indices
            counting.append(atomgroup_IDS)

        ##making a list of the water IDs that appear in the simulation in that pocket
        flat_list = [item for sublist in counting for item in sublist]

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        # for frame_no in tqdm(range(100)):
        for frame_no in tqdm(range(len(u.trajectory))):
            u.trajectory[frame_no]
            waters_resid = counting[frame_no]
            ##extracting the water coordinates for inside the pocket
            if len(waters_resid) == 1:
                ##(x,y,z) positions for the water atom (residue) at frame i
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(waters_resid[0])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##if multiple waters in pocket then use water with largest frequency of pocket occupation
            elif len(waters_resid) > 1:
                freq_count = []
                for ID in waters_resid:
                    freq_count.append([flat_list.count(ID), ID])
                freq_count.sort(key=lambda x: x[0])
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(freq_count[-1][1])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##10000.0 = no waters bound
            elif len(waters_resid) < 1:
                psilist.append(10000.0)
                philist.append(10000.0)

        water_out = [psilist, philist]
        water_ID = "O" + str(wat_no + 1)
        water_pocket_occupation_frequency = 1 - psilist.count(10000.0) / len(
            psilist)
        atom_location = coords[wat_no] + g.origin

        water_frequencies.append(
            [water_ID, atom_location, water_pocket_occupation_frequency])

        ##WRITE OUT WATER FEATURES INTO SUBDIRECTORY
        if write is True:
            if not os.path.exists('water_features/'):
                os.makedirs('water_features/')
            filename = 'water_features/' + structure_input[
                0:-4] + water_ID + '.txt'
            with open(filename, 'w') as output:
                for row in water_out:
                    output.write(str(row)[1:-1] + '\n')

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[wat_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=water_ID,
                hetero=True,
                atom_name=atomgroup,
                element="O")
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)
        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(water_frequencies)):
            #scale the water resid by the starting resid
            water_resid = len(u_pdb.residues) - top_waters + res
            u_pdb.residues[water_resid].atoms.tempfactors = water_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        filename = 'water_features/' + structure_input[
            0:-4] + 'WaterPocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in water_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return water_frequencies