Пример #1
0
def test_hbond_structure(pdb_id):
    file_name = join(data_dir("structure"), pdb_id + ".mmtf")

    array = load_structure(file_name)
    # Only consider amino acids for consistency
    # with bonded hydrogen detection in MDTraj
    array = array[..., struc.filter_amino_acids(array)]
    if isinstance(array, struc.AtomArrayStack):
        # For consistency with MDTraj 'S' cannot be acceptor element
        # https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/hbond.py#L365
        triplets, mask = struc.hbond(array, acceptor_elements=("O", "N"))
    else:
        triplets = struc.hbond(array, acceptor_elements=("O", "N"))

    # Save to new pdb file for consistent treatment of inscode/altloc
    # im MDTraj
    temp = NamedTemporaryFile("w+", suffix=".pdb")
    save_structure(temp.name, array)

    # Compare with MDTraj
    import mdtraj
    traj = mdtraj.load(temp.name)
    temp.close()
    triplets_ref = mdtraj.baker_hubbard(traj, freq=0, periodic=False)

    # Both packages may use different order
    # -> use set for comparison
    triplets_set = set([tuple(tri) for tri in triplets])
    triplets_ref_set = set([tuple(tri) for tri in triplets_ref])
    assert triplets_set == triplets_ref_set
Пример #2
0
def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup):
    """
    Write a new atom to a reference structure to visualise conserved non-protein atom sites.

    Parameters
    ----------
    pdb_outname : str
        Filename of reference structure.
    atom_location : array
        (x,y,z) coordinates of the atom location with respect to the reference structure.
    atom_ID : str
        A unique ID for the atom.
    atomgroup : str
        MDAnalysis atomgroup to describe the atom.

    """

    ##PDB_VISUALISATION
    ##rescursively add waters to the pdb file one by one as they are processed
    # # Read the file into Biotite's structure object (atom array)
    atom_array = strucio.load_structure(pdb_outname)
    res_id = atom_array.res_id[-1] + 1
    # Add an HETATM
    atom = struc.Atom(
        coord=atom_location,
        chain_id="X",
        # The residue ID is the last ID in the file +1
        res_id=res_id,
        res_name=atom_ID,
        hetero=True,
        atom_name=atomgroup,
        element="O")
    atom_array += struc.array([atom])
    # Save edited structure
    strucio.save_structure(pdb_outname, atom_array)
Пример #3
0
def test_dihedral_backbone_result(file_name):
    import mdtraj

    mmtf_file = mmtf.MMTFFile.read(file_name)
    array = mmtf.get_structure(mmtf_file, model=1)
    array = array[struc.filter_amino_acids(array)]
    if array.array_length() == 0:
        # Structure contains no protein
        # -> determination of backbone angles makes no sense
        return

    for chain in struc.chain_iter(array):
        print("Chain: ", chain.chain_id[0])
        if len(struc.check_res_id_continuity(chain)) != 0:
            # Do not test discontinuous chains
            return
        test_phi, test_psi, test_ome = struc.dihedral_backbone(chain)

        temp = NamedTemporaryFile("w+", suffix=".pdb")
        strucio.save_structure(temp.name, chain)
        traj = mdtraj.load(temp.name)
        temp.close()
        _, ref_phi = mdtraj.compute_phi(traj)
        _, ref_psi = mdtraj.compute_psi(traj)
        _, ref_ome = mdtraj.compute_omega(traj)
        ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0]

        assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3)
        assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3)
        assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
Пример #4
0
def test_saving(suffix):
    """
    Check if loading a structure from a file written via
    :func:`save_structure()` gives the same result as the input to
    :func:`save_structure()`.
    """
    path = join(data_dir("structure"), "1l2y.mmtf")
    ref_array = strucio.load_structure(path)
    if suffix in ("trr", "xtc", "tng", "dcd", "netcdf"):
        # Reading a trajectory file requires a template
        template = path
    else:
        template = None

    temp = NamedTemporaryFile("w", suffix=f".{suffix}", delete=False)
    strucio.save_structure(temp.name, ref_array)
    temp.close()

    test_array = strucio.load_structure(temp.name, template)
    os.remove(temp.name)

    for category in ref_array.get_annotation_categories():
        if category == "chain_id" and suffix == "gro":
            # The chain ID is not written to GRO files
            continue
        assert test_array.get_annotation(category).tolist() \
            ==  ref_array.get_annotation(category).tolist()
    assert test_array.coord.flatten().tolist() == pytest.approx(
        ref_array.coord.flatten().tolist(), abs=1e-2)
def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory, overwrite=True)
    try:
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
    except biotite.InvalidFileError:
        # Structure probably contains multiple models with different
        # number of atoms
        # -> Cannot load AtomArrayStack
        # -> Skip writing GRO and NPZ file
        return
    # Create *.gro file
    strucio.save_structure(join(directory, pdb_id + ".npz"), array)
    # Create *.gro files using GROMACS
    # Clean PDB file -> remove inscodes and altlocs
    if include_gro:
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Пример #6
0
def test_dihedral_backbone_result(file_name):
    import mdtraj

    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(file_name)
    array = mmtf.get_structure(mmtf_file, model=1)
    array = array[struc.filter_amino_acids(array)]
    for chain in struc.chain_iter(array):
        print("Chain: ", chain.chain_id[0])
        if len(struc.check_id_continuity(chain)) != 0:
            # Do not test discontinuous chains
            return
        test_phi, test_psi, test_ome = struc.dihedral_backbone(chain)

        temp_file_name = biotite.temp_file("pdb")
        strucio.save_structure(temp_file_name, chain)
        traj = mdtraj.load(temp_file_name)
        _, ref_phi = mdtraj.compute_phi(traj)
        _, ref_psi = mdtraj.compute_psi(traj)
        _, ref_ome = mdtraj.compute_omega(traj)
        ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0]

        assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3)
        assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3)
        assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
Пример #7
0
def test_saving_with_extra_args(suffix):
    """
    Test if giving a wrong optional parameter to
    :func:`save_structure()` raises a :class:`TypeError`
    """
    array = strucio.load_structure(join(data_dir("structure"), "1l2y.mmtf"))
    temp = NamedTemporaryFile("w+", suffix=f".{suffix}")
    with pytest.raises(TypeError):
        strucio.save_structure(temp.name, array, answer=42)
    temp.close()
Пример #8
0
def test_small_molecule():
    """
    Check if loading a small molecule file written via
    :func:`save_structure()` gives the same result as the input to
    :func:`save_structure()`.
    """
    path = join(data_dir("structure"), "molecules", "TYR.sdf")
    ref_array = strucio.load_structure(path)
    temp = NamedTemporaryFile("w", suffix=".sdf", delete=False)
    strucio.save_structure(temp.name, ref_array)
    temp.close()

    test_array = strucio.load_structure(temp.name)
    os.remove(temp.name)

    assert test_array == ref_array
Пример #9
0
def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory)
    if include_gro:
        # Create *.gro files using GROMACS
        # Clean PDB file -> remove inscodes and altlocs
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "gmx", "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Пример #10
0
def test_gro_id_overflow():
    # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000
    num_atoms = 100005
    atoms = array([
        Atom([1, 2, 3],
             atom_name="CA",
             element="C",
             res_name="X",
             res_id=i + 1) for i in range(num_atoms)
    ])
    atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read .gro file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    assert s.array_length() == num_atoms
Пример #11
0
def test_gro_no_box():
    """
    .gro file format requires valid box parameters at the end of each
    model. However, if we read such a file in, the resulting object should not
    have an assigned box.
    """

    # Create an AtomArray
    atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1)
    atoms = array([atom])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read in file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    # Assert no box with 0 dimension
    assert s.box is None
def get_water_features(structure_input,
                       xtc_input,
                       atomgroup,
                       grid_wat_model=None,
                       grid_input=None,
                       top_waters=30,
                       write=None,
                       pdb_vis=True):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_WaterSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_wat_model is not None:
            D.density.convert_density(grid_wat_model)
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    xyz, val = local_maxima_3D(g.grid)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]
    water_frequencies = []

    if top_waters > len(coords):
        top_waters = len(coords)

    print('\n')
    print('Featurizing ', top_waters, ' Waters')
    for wat_no in range(top_waters):
        print('\n')
        print('Water no: ', wat_no)
        print('\n')
        philist = []
        psilist = []

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        counting = []
        for frame_no in tqdm(range(len(u.trajectory))):
            # for frame_no in tqdm(range(100)):
            u.trajectory[frame_no]
            ##list all water oxygens within sphere of radius X centered on water prob density maxima
            radius = ' 3.5'
            atomgroup_IDS = u.select_atoms('name ' + atomgroup +
                                           ' and point ' +
                                           maxdens_coord_str[wat_no] +
                                           radius).indices
            counting.append(atomgroup_IDS)

        ##making a list of the water IDs that appear in the simulation in that pocket
        flat_list = [item for sublist in counting for item in sublist]

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        # for frame_no in tqdm(range(100)):
        for frame_no in tqdm(range(len(u.trajectory))):
            u.trajectory[frame_no]
            waters_resid = counting[frame_no]
            ##extracting the water coordinates for inside the pocket
            if len(waters_resid) == 1:
                ##(x,y,z) positions for the water atom (residue) at frame i
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(waters_resid[0])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##if multiple waters in pocket then use water with largest frequency of pocket occupation
            elif len(waters_resid) > 1:
                freq_count = []
                for ID in waters_resid:
                    freq_count.append([flat_list.count(ID), ID])
                freq_count.sort(key=lambda x: x[0])
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(freq_count[-1][1])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##10000.0 = no waters bound
            elif len(waters_resid) < 1:
                psilist.append(10000.0)
                philist.append(10000.0)

        water_out = [psilist, philist]
        water_ID = "O" + str(wat_no + 1)
        water_pocket_occupation_frequency = 1 - psilist.count(10000.0) / len(
            psilist)
        atom_location = coords[wat_no] + g.origin

        water_frequencies.append(
            [water_ID, atom_location, water_pocket_occupation_frequency])

        ##WRITE OUT WATER FEATURES INTO SUBDIRECTORY
        if write is True:
            if not os.path.exists('water_features/'):
                os.makedirs('water_features/')
            filename = 'water_features/' + structure_input[
                0:-4] + water_ID + '.txt'
            with open(filename, 'w') as output:
                for row in water_out:
                    output.write(str(row)[1:-1] + '\n')

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[wat_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=water_ID,
                hetero=True,
                atom_name=atomgroup,
                element="O")
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)
        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(water_frequencies)):
            #scale the water resid by the starting resid
            water_resid = len(u_pdb.residues) - top_waters + res
            u_pdb.residues[water_resid].atoms.tempfactors = water_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        filename = 'water_features/' + structure_input[
            0:-4] + 'WaterPocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in water_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return water_frequencies
Пример #13
0
def test_saving_with_extra_args(suffix):
    array = strucio.load_structure(join(data_dir, "1l2y.mmtf"))
    with pytest.raises(TypeError):
        strucio.save_structure(biotite.temp_file("1l2y." + suffix),
                               array,
                               answer=42)
Пример #14
0
def test_saving(suffix):
    array = strucio.load_structure(join(data_dir, "1l2y.mmtf"))
    strucio.save_structure(biotite.temp_file("1l2y." + suffix), array)
Пример #15
0
mode_vectors = vectors[MODE].reshape((-1, 3))
# Rescale, so that the largest vector has the length 'MAX_AMPLITUDE'
vector_lenghts = np.sqrt(np.sum(mode_vectors**2, axis=-1))
scale = MAX_AMPLITUDE / np.max(vector_lenghts)
mode_vectors *= scale

# Stepwise application of eigenvectors as smooth sine oscillation
time = np.linspace(0, 2 * np.pi, FRAMES, endpoint=False)
deviation = np.sin(time)[:, newaxis, newaxis] * mode_vectors

# Apply oscillation of CA atom to all atoms in the corresponding residue
oscillation = np.zeros((FRAMES, len(protein_chain), 3))
residue_starts = struc.get_residue_starts(
    protein_chain,
    # The last array element will be the length of the atom array,
    # i.e. no valid index
    add_exclusive_stop=True)
for i in range(len(residue_starts) - 1):
    res_start = residue_starts[i]
    res_stop = residue_starts[i + 1]
    oscillation[:, res_start:res_stop, :] \
        = protein_chain.coord[res_start:res_stop, :] + deviation[:, i:i+1, :]

# An atom array stack containing all frames
oscillating_structure = struc.from_template(protein_chain, oscillation)
# Save as PDB for rendering a video with PyMOL
temp = NamedTemporaryFile(suffix=".pdb")
strucio.save_structure(temp.name, oscillating_structure)
# biotite_static_image = normal_modes.gif

temp.close()
Пример #16
0
        leaflet_masks[i] = struc.get_residue_masks(structure, head_leaflet) \
                                .any(axis=0)
    return leaflet_masks


# Suppress warning that elements were guessed,
# as this PDB file omits the 'chemical element' column
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    structure = strucio.load_structure(PDB_FILE_PATH)
# We cannot go over periodic boundaries in this case,
# because the input PDB does not define a box -> periodic=False
# However, as we have a planer lipid bilayer,
# periodicity should not matter
leaflets = find_leaflets(
    structure,
    head_atom_mask=(structure.res_name == "DPP") & (structure.atom_name == "P")
)
# Bilayer -> Expect two leaflets
assert len(leaflets) == 2
# Mark leaflets using different chain IDs
for chain_id, leaflet_mask in zip(("A", "B"), leaflets):
    structure.chain_id[leaflet_mask] = chain_id

# Save marked lipids to structure file
temp = NamedTemporaryFile(suffix=".pdb")
strucio.save_structure(temp.name, structure)
# Visualization with PyMOL...

temp.close()
Пример #17
0
# Since programmers are usually lazy and do not want to write more code
# than necessary, there are two convenient function for loading and
# saving atom arrays or stacks, unifying the forementioned file formats:
# :func:`load_structure()` takes a file path and outputs an array
# (or stack, if the files contains multiple models).
# Internally, this function uses the appropriate :class:`File` class,
# depending on the file format.
# The analogous :func:`save_structure()` function provides a shortcut for
# writing to structure files.
# The desired file format is inferred from the the extension of the
# provided file name.

import biotite.structure.io as strucio
stack_from_pdb = strucio.load_structure(pdb_file_path)
stack_from_cif = strucio.load_structure(cif_file_path)
strucio.save_structure(biotite.temp_file("cif"), stack_from_pdb)

########################################################################
# Reading trajectory files
# ^^^^^^^^^^^^^^^^^^^^^^^^
#
# If the package *MDtraj* is installed *Biotite* provides a read/write
# interface for different trajectory file formats.
# More information can be found in the API reference.
#
# Array indexing and filtering
# ----------------------------
#
# .. currentmodule:: biotite.structure
#
# Atom arrays and stacks can be indexed in a similar way a
Пример #18
0
                       & (peptide.atom_name == "O"))[0][0]
    coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c],
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide


sequence = seq.ProteinSequence("TITANITE")
atom_array = assemble_peptide(sequence)
strucio.save_structure(biotite.temp_file("mmtf"), atom_array)
# Visualization with PyMOL...
# biotite_static_image = peptide_assembly.png
Пример #19
0
assemblies = pdbx.list_assemblies(pdbx_file)
print("ID    name")
print()
for assembly_id, name in assemblies.items():
    print(f"{assembly_id:2}    {name}")

########################################################################
# ``'complete icosahedral assembly'`` sounds good.
# In fact, often the first assembly is the complete one.
# Hence, the :func:`get_assembly()` function builds the first assembly
# by default.
# Since we know the ID we want (``'1'``), we will provide it to this
# function anyway.
# It returns the chosen assembly as :class:`AtomArray`.
# Note that the assembly ID is a string, not an integer.

biological_unit = pdbx.get_assembly(pdbx_file, assembly_id="1", model=1)
print("Number of protein chains:", struc.get_chain_count(biological_unit))

########################################################################
# Now we could do some analysis on the biological unit.
# But for this example we will simply save the entire assembly as *PDB*
# file for later visualization.

# For brevity, save only CA atoms to file for visualization
biological_unit = biological_unit[biological_unit.atom_name == "CA"]
temp = NamedTemporaryFile(suffix=".cif")
strucio.save_structure(temp.name, biological_unit)
# Visualization with PyMOL...

temp.close()
Пример #20
0
def test_saving(suffix):
    array = strucio.load_structure(join(data_dir("structure"), "1l2y.mmtf"))
    temp = NamedTemporaryFile("w+", suffix=f".{suffix}")
    strucio.save_structure(temp.name, array)
    temp.close()
Пример #21
0
# saving atom arrays or stacks, unifying the forementioned file formats:
# :func:`load_structure()` takes a file path and outputs an array
# (or stack, if the file contains multiple models).
# Internally, this function uses the appropriate :class:`File` class,
# depending on the file format.
# The analogous :func:`save_structure()` function provides a shortcut
# for writing to structure files.
# The desired file format is inferred from the the extension of the
# provided file name.

import biotite.structure.io as strucio

stack_from_pdb = strucio.load_structure(pdb_file_path)
stack_from_cif = strucio.load_structure(cif_file_path)
temp_file = NamedTemporaryFile(suffix=".cif")
strucio.save_structure(temp_file.name, stack_from_pdb)
temp_file.close()

########################################################################
# Reading trajectory files
# ^^^^^^^^^^^^^^^^^^^^^^^^
#
# If the package *MDtraj* is installed, *Biotite* provides a read/write
# interface for different trajectory file formats.
# All supported trajectory formats have in common, that they store
# only coordinates.
# These can be extracted as :class:`ndarray` with the
# :func:`get_coord()` method.

from tempfile import NamedTemporaryFile
import requests
Пример #22
0
def test_saving_with_extra_args(suffix):
    array = strucio.load_structure(join(data_dir("structure"), "1l2y.mmtf"))
    temp = NamedTemporaryFile("w+", suffix=f".{suffix}")
    with pytest.raises(TypeError):
        strucio.save_structure(temp.name, array, answer=42)
    temp.close()
def get_atom_features(structure_input,
                      xtc_input,
                      atomgroup,
                      element,
                      grid_input=None,
                      top_atoms=None,
                      write=None,
                      pdb_vis=True,
                      grid_write=None):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_IonSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    ## The density will be obtained from the universe which depends on the .xtc and .gro
    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_write is not None:
            D.density.convert_density("Angstrom^{-3}")
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    ##converting the density to a probability
    atom_number = len(u.select_atoms('name ' + atomgroup))
    grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid))

    ##mask all probabilities below the average water probability
    average_probability_density = atom_number / sum(
        1 for i in grid_data.flat if i)
    ##mask all grid centers with density less than threshold density
    grid_data[grid_data <= average_probability_density] = 0.0

    xyz, val = local_maxima_3D(grid_data)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    # values = [val[i] for i in val_sort]
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]

    atom_frequencies = []

    if top_atoms is None:
        top_atoms = len(coords)
    elif top_atoms > len(coords):
        top_atoms = len(coords)

    print('\n')
    print('Featurizing ', top_atoms, ' Atoms')
    for atom_no in range(top_atoms):
        print('\n')
        print('Atom no: ', atom_no)
        print('\n')

        counting = []
        for i in tqdm(range(len(u.trajectory))):
            # for i in tqdm(range(100)):
            u.trajectory[i]
            ##list all water resids within sphere of radius 2 centered on water prob density maxima
            atomgroup_IDS = list(
                u.select_atoms('name ' + atomgroup + ' and point ' +
                               maxdens_coord_str[atom_no] + ' 2').indices)
            ##select only those resids that have all three atoms within the water pocket
            if len(atomgroup_IDS) == 0:
                atomgroup_IDS = [-1]
            counting.append(atomgroup_IDS)

        atom_ID = element + str(atom_no + 1)
        pocket_occupation_frequency = 1 - counting.count(-1) / len(counting)
        atom_location = coords[atom_no] + g.origin

        atom_frequencies.append(
            [atom_ID, atom_location, pocket_occupation_frequency])

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[atom_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=atom_ID,
                hetero=True,
                atom_name=atomgroup,
                element=element)
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)

        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(atom_frequencies)):
            atom_resid = len(u_pdb.residues) - top_atoms + res
            u_pdb.residues[atom_resid].atoms.tempfactors = atom_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        if not os.path.exists('atom_features/'):
            os.makedirs('atom_features/')
        filename = 'atom_features/PocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in atom_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return atom_frequencies
Пример #24
0
assert (trajectory_kinase_left.coord.shape[0] > 0)

# Get simulation time for plotting purposes
time = xtc_file.get_time()
time = 10**(-3) * time

#ind_stable = np.where(time == 75)[0][0]
#print(" stable after 75ns which is frame[" +str(ind_stable) + "] ")

print("start time is ::" + str(time[0]))
print("end time is   :: " + str(time[-1]))

print(" ... writing start frame ...")
frame_start = template_model.copy()
frame_start.coord = trajectory[0].coord
save_structure("frame_start_coord.pdb", frame_start)
save_structure("frame_start.pdb", trajectory[0])
print(" ... done ... ")

print(" ... writing frame[1] ... ")
frame_1 = template_model.copy()
frame_1.coord = trajectory[1].coord
save_structure("frame_1_coord.pdb", frame_1)
save_structure("frame_1.pdb", trajectory[1])
print(" ... done ... ")

print(" ... writing end frame ...")
frame_end = template_model.copy()
frame_end.coord = trajectory[-1].coord
save_structure("frame_end_coord.pdb", frame_end)
save_structure("frame_end.pdb", trajectory[-1])
Пример #25
0
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide


sequence = seq.ProteinSequence("TITANITE")
atom_array = assemble_peptide(sequence)
out_file = NamedTemporaryFile(suffix=".mmtf", delete=False)
strucio.save_structure(out_file.name, atom_array)
# Visualization with PyMOL...

out_file.close()