def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory, overwrite=True)
    try:
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
    except biotite.InvalidFileError:
        # Structure probably contains multiple models with different
        # number of atoms
        # -> Cannot load AtomArrayStack
        # -> Skip writing GRO and NPZ file
        return
    # Create *.gro file
    strucio.save_structure(join(directory, pdb_id + ".npz"), array)
    # Create *.gro files using GROMACS
    # Clean PDB file -> remove inscodes and altlocs
    if include_gro:
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Пример #2
0
def api_route():
    pdb_id = request.args.get("pdb_id", "1Q2W")
    file_format = request.args.get("format", "mmtf")
    file_name = rcsb.fetch(pdb_id, file_format, biotite.temp_dir())
    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(file_name)
    print()

    try:
        mmtf_s = mmtf_sec(mmtf_file).tolist()
    except:
        mmtf_s = []
    try:
        dssp_s = dssp_sec(mmtf_file).tolist()
    except:
        dssp_s = []
    try:
        psea_s = psea_sec(mmtf_file).tolist()
    except:
        dssp_s = []

    structs = {
        "mmtf": mmtf_s,
        "dssp": dssp_s,
        "psea": psea_s,
    }
    return jsonify(
        sequence=list(mmtf_file["entityList"][0]["sequence"]),
        **structs,
        diffs=diff_all(**structs),
    )
Пример #3
0
def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory)
    if include_gro:
        # Create *.gro files using GROMACS
        # Clean PDB file -> remove inscodes and altlocs
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "gmx", "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Пример #4
0
def get_diameter(pdb_id):
    file_name = rcsb.fetch(pdb_id, "mmtf", gettempdir())
    atom_array = strucio.load_structure(file_name)
    # Remove all non-amino acids
    atom_array = atom_array[struc.filter_amino_acids(atom_array)]
    coord = atom_array.coord
    # Calculate all pairwise difference vectors
    diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :]
    # Calculate absolute of difference vectors -> square distances
    sq_dist = np.sum(diff*diff, axis=-1)
    # Maximum distance is diameter
    diameter = np.sqrt(np.max(sq_dist))
    return diameter
Пример #5
0
def plot_rna(pdb_id, axes):
    # Download the PDB file and read the structure
    pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir())
    pdb_file = pdb.PDBFile.read(pdb_file_path)
    atom_array = pdb.get_structure(pdb_file)[0]
    nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

    # Compute the base pairs and their pseudoknot order
    base_pairs = struc.base_pairs(nucleotides)
    base_pairs = struc.get_residue_positions(
        nucleotides, base_pairs.flatten()
    ).reshape(base_pairs.shape)
    pseudoknot_order = struc.pseudoknots(base_pairs)[0]

    # Set the linestyle according to the pseudoknot order
    linestyles = np.full(base_pairs.shape[0], '-', dtype=object)
    linestyles[pseudoknot_order == 1] = '--'
    linestyles[pseudoknot_order == 2] = ':'

    # Indicate canonical nucleotides with an upper case one-letter-code
    # and non-canonical nucleotides with a lower case one-letter-code
    base_labels = []
    for base in struc.residue_iter(nucleotides):
        one_letter_code, exact = struc.map_nucleotide(base)
        if exact:
            base_labels.append(one_letter_code)
        else:
            base_labels.append(one_letter_code.lower())

    # Color canonical Watson-Crick base pairs with a darker orange and
    # non-canonical base pairs with a lighter orange
    colors = np.full(base_pairs.shape[0], biotite.colors['brightorange'])
    for i, (base1, base2) in enumerate(base_pairs):
        name1 = base_labels[base1]
        name2 = base_labels[base2]
        if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]:
            colors[i] = biotite.colors["dimorange"]

    # Plot the secondary structure
    graphics.plot_nucleotide_secondary_structure(
        axes, base_labels, base_pairs, struc.get_residue_count(nucleotides),
        pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles,
        bond_color=colors,
        # Margin to compensate for reduced axis limits in shared axis
        border=0.13
    )

    # Use the PDB ID to label each plot
    axes.set_title(pdb_id, loc="left")
Пример #6
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
Пример #7
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile.read(file_path_or_obj)
        mmtf.get_structure(file)
    elif format == "fasta":
        file = fasta.FastaFile.read(file_path_or_obj)
        # Test if the file contains any sequences
        assert len(fasta.get_sequences(file)) > 0
Пример #8
0
def test_search_sequence():
    IDENTIY_CUTOFF = 0.9
    pdbx_file = pdbx.PDBxFile.read(join(data_dir("structure"), "1l2y.cif"))
    ref_sequence = pdbx.get_sequence(pdbx_file)[0]
    query = rcsb.SequenceQuery(ref_sequence,
                               "protein",
                               min_identity=IDENTIY_CUTOFF)
    test_ids = rcsb.search(query)

    for id in test_ids:
        fasta_file = fasta.FastaFile.read(rcsb.fetch(id, "fasta"))
        test_sequence = fasta.get_sequence(fasta_file)
        matrix = align.SubstitutionMatrix.std_protein_matrix()
        alignment = align.align_optimal(ref_sequence,
                                        test_sequence,
                                        matrix,
                                        terminal_penalty=False)[0]
        identity = align.get_sequence_identity(alignment, mode="shortest")
        assert identity >= IDENTIY_CUTOFF
Пример #9
0
# License: BSD 3 clause

import biotite
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb
import numpy as np

# The output file names
# Modify these values for actual file output
ku_dna_file = biotite.temp_file("ku_dna.cif")
ku_file = biotite.temp_file("ku.cif")

# Download and parse structure files
file = rcsb.fetch("1JEY", "mmtf", biotite.temp_dir())
ku_dna = strucio.load_structure(file)
file = rcsb.fetch("1JEQ", "mmtf", biotite.temp_dir())
ku = strucio.load_structure(file)
# Remove DNA and water
ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")]
ku_dna = ku_dna[~struc.filter_solvent(ku_dna)]
ku = ku[~struc.filter_solvent(ku)]
# The structures have a differing amount of atoms missing
# at the the start and end of the structure
# -> Find common structure
ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)]
ku_common = ku[struc.filter_intersection(ku, ku_dna)]
# Superimpose
ku_superimposed, transformation = struc.superimpose(
    ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
Пример #10
0
Downloading structure files from the *RCSB PDB* is quite easy:
Simply specify the PDB ID, the file format and the target directory
for the :func:`fetch()` function and you are done.
The function even returns the path to the downloaded file, so you
can just load it via the other *Biotite* subpackages
(more on this later).
We will download on a protein structure of the miniprotein *TC5b*
(PDB: 1L2Y) into a temporary directory.
"""

from os.path import relpath
import biotite
import biotite.database.rcsb as rcsb

file_path = rcsb.fetch("1l2y", "pdb", biotite.temp_dir())
print(relpath(file_path))

########################################################################
# In case you want to download multiple files, you are able to specify a
# list of PDB IDs, which in return gives you a list of file paths.

# Download files in the more modern mmCIF format
file_paths = rcsb.fetch(["1l2y", "1aki"], "cif", biotite.temp_dir())
print([relpath(file_path) for file_path in file_paths])

########################################################################
# By default :func:`fetch()` checks whether the file to be fetched
# already exists in the directory, and downloads it, if it does not
# exist yet.
# If you want to download files irrespectively, set :obj:`overwrite` to
Пример #11
0
capsid from *Paramecium bursaria Chlorella virus type 1*
- a h**o-5040-mer!

At first we will check, which assemblies are available to us.
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

import numpy as np
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb

pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1M4X", "mmcif"))

assemblies = pdbx.list_assemblies(pdbx_file)
print("ID    name")
print()
for assembly_id, name in assemblies.items():
    print(f"{assembly_id:2}    {name}")

########################################################################
# ``'complete icosahedral assembly'`` sounds good.
# In fact, often the first assembly is the complete one.
# Hence, the :func:`get_assembly()` function builds the first assembly
# by default.
# Since we know the ID we want (``'1'``), we will provide it to this
# function anyway.
# It returns the chosen assembly as :class:`AtomArray`.
Пример #12
0
    [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
    [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
    [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
    [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
    [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
    [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
    [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
    [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
    [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
    [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
    [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
])

# Fetch animal lysoyzme structures
lyso_files = rcsb.fetch(["1REX", "1AKI", "1DKJ", "1GD6"],
                        format="mmtf",
                        target_path=biotite.temp_dir())
organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"]

# Create a PB sequence from each structure
pb_seqs = []
for file_name in lyso_files:
    file = mmtf.MMTFFile()
    file.read(file_name)
    # Take only the first model into account
    array = mmtf.get_structure(file, model=1)
    # Remove everything but the first protein chain
    array = array[struc.filter_amino_acids(array)]
    array = array[array.chain_id == array.chain_id[0]]

    # Calculate backbone dihedral angles,
Пример #13
0
ammolite.cmd.set("cartoon_side_chain_helper", 1)
ammolite.cmd.set("cartoon_oval_length", 0.8)
ammolite.cmd.set("depth_cue", 0)
ammolite.cmd.set("valence", 0)

#----------------------------------------------------------------------#

# Define colors used later
ammolite.cmd.set_color("lightorange", to_rgb(biotite.colors["lightorange"]))
ammolite.cmd.set_color("lightgreen", to_rgb(biotite.colors["lightgreen"]))
ammolite.cmd.set_color("darkgreen", to_rgb(biotite.colors["darkgreen"]))

#----------------------------------------------------------------------#

# Fetch and load cytochrome C structure and remove water
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("1C75", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True)
cyt_c = structure[structure.res_name != "HOH"]

pymol_cyt_c = ammolite.PyMOLObject.from_structure(cyt_c)

#----------------------------------------------------------------------#

# Style protein
protein_mask = struc.filter_amino_acids(cyt_c)

pymol_cyt_c.show_as("cartoon", protein_mask)
pymol_cyt_c.color("lightgreen", protein_mask & (cyt_c.element == "C"))

#----------------------------------------------------------------------#
Пример #14
0
def build_patterns(structfam, folder):
    patterns = []
    for pdb, c, start, end in tqdm(structfam):
        file_name = rcsb.fetch(pdb, "mmtf", biotite.temp_dir())
        mmtf_file = mmtf.MMTFFile()
        mmtf_file.read(file_name)

        array = mmtf.get_structure(mmtf_file, model=1)
        tk_dimer = array[struc.filter_amino_acids(array)]

        # The chain ID corresponding to each residue
        chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)]

        sse = mmtf_file["secStructList"]
        sse = sse[:chain_id_per_res.shape[0]][chain_id_per_res == c]
        sse = np.array(sse[start:end + 1])
        sse = np.array([sec_struct_codes[code % 8] for code in sse],
                       dtype="U1")

        sse8 = to_onehot([dssp_codes[x] for x in sse], (None, 8))
        dss8 = (sse8[1:] - sse8[:-1])
        cls = to_onehot(np.where(dss8 == -1)[1], (None, 8)).T
        bbox = np.array(
            [np.where(dss8 == 1)[0],
             np.where(dss8 == -1)[0], *cls]).T
        pat8 = np.argmax(bbox[:, 2:], 1)

        sse3 = to_onehot([abc_codes[dssp_to_abc[x]] for x in sse], (None, 3))
        dss3 = (sse3[1:] - sse3[:-1])
        cls = to_onehot(np.where(dss3 == -1)[1], (None, 3)).T
        bbox = np.array(
            [np.where(dss3 == 1)[0],
             np.where(dss3 == -1)[0], *cls]).T
        pat3 = np.argmax(bbox[:, 2:], 1)
        patterns.append((pat3, pat8))
    if len(patterns) == 0:
        print("No pattern find")
        return None, None, None, None
    c_patterns3, n_patterns3, c_patterns8, n_patterns8, weights = [], [], [], [], []
    for pat3, pat8 in patterns:
        char_pat8 = "".join([sec_struct_codes[x] for x in pat8])
        char_pat3 = "".join(["abc"[x] for x in pat3])
        c_patterns8.append(char_pat8)
        n_patterns8.append(list(pat8))
        c_patterns3.append(char_pat3)
        n_patterns3.append(list(pat3))
    occ_sum8 = dict()
    occ_sum3 = dict()

    correspondings8 = dict()
    correspondings3 = dict()
    for c8, n8, c3, n3 in zip(c_patterns8, n_patterns8, c_patterns3,
                              n_patterns3):
        if len(c3) == 0:
            continue
        if c3[0] != "c":
            c3 = "c" + c3
            n3 = [2] + n3
        if c3[-1] != "c":
            c3 = c3 + "c"
            n3 = n3 + [2]
        if c8[0] != "C":
            c8 = "C" + c8
            n8 = [7] + n8
        if c8[-1] != "C":
            c8 = c8 + "C"
            n8 = n8 + [7]
        if c8 not in occ_sum8.keys():
            occ_sum8[c8] = 0
            correspondings8[c8] = c8, n8
        occ_sum8[c8] += 1
        if c3 not in occ_sum3.keys():
            occ_sum3[c3] = 0
            correspondings3[c3] = c3, n3
        occ_sum3[c3] += 1

    c_pattern8, n_pattern8 = correspondings8[max(occ_sum8, key=occ_sum8.get)]
    c_pattern3, n_pattern3 = correspondings3[max(occ_sum3, key=occ_sum3.get)]

    push(f"{folder}/data.pt", "pattern",
         (c_pattern3, n_pattern3, c_pattern8, n_pattern8))

    return c_pattern3, n_pattern3, c_pattern8, n_pattern8, occ_sum3, occ_sum8
Пример #15
0
.. currentmodule:: biotite.database.rcsb

Downloading structure files from the *RCSB PDB* is quite easy:
Simply specify the PDB ID, the file format and the target directory
for the :func:`fetch()` function and you are done.
The function returns the path to the downloaded file, so you
can simply load the file via the other *Biotite* subpackages
(more on this later).
We will download on a protein structure of the miniprotein *TC5b*
(PDB: 1L2Y) into a temporary directory.
"""

from tempfile import gettempdir
import biotite.database.rcsb as rcsb

file_path = rcsb.fetch("1l2y", "pdb", gettempdir())
print(file_path)

########################################################################
# In case you want to download multiple files, you are able to specify a
# list of PDB IDs, which in return gives you a list of file paths.

# Download files in the more modern mmCIF format
file_paths = rcsb.fetch(["1l2y", "1aki"], "cif", gettempdir())
print([file_path for file_path in file_paths])

########################################################################
# By default :func:`fetch()` checks whether the file to be fetched
# already exists in the directory and downloads it, if it does not
# exist yet.
# If you want to download files irrespectively, set :obj:`overwrite` to
Пример #16
0
def plot_gaps(pdb_id, chain_id, ax):
    # Download and parse structure file
    path = rcsb.fetch(pdb_id, "mmtf", gettempdir())
    atom_array = strucio.load_structure(path)
    # Consider only one chain
    atom_array = atom_array[atom_array.chain_id == chain_id]
    # Array for saving the 'green', 'yellow' and 'red' state
    states = np.zeros(atom_array.res_id[-1], dtype=int)
    for i in range(len(states)):
        # Get array for only one residue ID
        residue = atom_array[atom_array.res_id == i + 1]
        if len(residue) == 0:
            # not existing
            states[i] = 0
        elif residue.res_name[0] == "UNK":
            # exisiting but polyalanine
            states[i] = 1
        else:
            # existing
            states[i] = 2

    # Find the intervals for each state
    state_intervals = []
    curr_state = None
    curr_start = None
    for i in range(len(states)):
        if curr_start is None:
            curr_start = i
            curr_state = states[i]
        else:
            if states[i] != states[i - 1]:
                state_intervals.append((curr_start, i, curr_state))
                curr_start = i
                curr_state = states[i]
    state_intervals.append((curr_start, i, curr_state))

    # Draw the state intervals as colored rectangles
    for interval in state_intervals:
        start = interval[0]
        stop = interval[1]
        state = interval[2]
        if state == 0:
            color = "firebrick"
        elif state == 1:
            color = "gold"
        elif state == 2:
            color = "forestgreen"
        ax.add_patch(
            Rectangle((start + 1 - 0.5, 0),
                      stop - start,
                      1,
                      edgecolor="None",
                      facecolor=color))
    # Some other visual stuff
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_xlim(0.5, len(states) + 0.5)
    ax.set_ylim(0, 2)
Пример #17
0
# Code source: Patrick Kunzmann
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import biotite.structure as struc
import biotite.structure.info as info
import biotite.structure.io.mmtf as mmtf
import biotite.structure.graphics as graphics
import biotite.database.rcsb as rcsb
import biotite.application.autodock as autodock

# Get the receptor structure
# and the original 'correct' conformation of the ligand
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf"))
structure = mmtf.get_structure(
    # Include formal charge for accurate partial charge calculation
    mmtf_file,
    model=1,
    include_bonds=True,
    extra_fields=["charge"])
# The asymmetric unit describes a streptavidin homodimer
# However, we are only interested in a single monomer
structure = structure[structure.chain_id == "B"]
receptor = structure[struc.filter_amino_acids(structure)]

ref_ligand = structure[structure.res_name == "BTN"]
ref_ligand_center = struc.centroid(ref_ligand)

# Independently, get the ligand without optimized conformation
Пример #18
0
}
# Converter for the DSSP secondary structure elements
# to the classical ones
dssp_to_abc = {
    "I": "c",
    "S": "c",
    "H": "a",
    "E": "b",
    "G": "c",
    "B": "b",
    "T": "c",
    "C": "c"
}

# Fetch and load structure
file_name = rcsb.fetch("1QGD", "mmtf", biotite.temp_dir())
mmtf_file = mmtf.MMTFFile()
mmtf_file.read(file_name)
array = mmtf.get_structure(mmtf_file, model=1)
# Transketolase homodimer
tk_dimer = array[struc.filter_amino_acids(array)]
# Transketolase monomer
tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

# The chain ID corresponding to each residue
chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)]
sse = mmtf_file["secStructList"]
sse = sse[sse != -1]
sse = sse[chain_id_per_res == "A"]
sse = np.array([sec_struct_codes[code] for code in sse if code != -1],
               dtype="U1")
Пример #19
0
- a h**o-5040-mer!

At first we will check, which assemblies are available to us.
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

import numpy as np
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb

pdbx_file = pdbx.PDBxFile()
pdbx_file.read(rcsb.fetch("1M4X", "mmcif"))

assemblies = pdbx.list_assemblies(pdbx_file)
print("ID    name")
print()
for assembly_id, name in assemblies.items():
    print(f"{assembly_id:2}    {name}")

########################################################################
# ``'complete icosahedral assembly'`` sounds good.
# In fact, often the first assembly is the complete one.
# Hence, the :func:`get_assembly()` function builds the first assembly
# by default.
# Since we know the ID we want (``'1'``), we will provide it to this
# function anyway.
# It returns the chosen assembly as :class:`AtomArray`.
Пример #20
0
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

from tempfile import gettempdir
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import scipy.stats as sts

# Download and parse file
file = rcsb.fetch("3vkh", "cif", gettempdir())
atom_array = strucio.load_structure(file)
# Calculate backbone dihedral angles
# from one of the two identical chains in the asymmetric unit
phi, psi, omega = struc.dihedral_backbone(
    atom_array[atom_array.chain_id == "A"])
# Conversion from radians into degree
phi *= 180 / np.pi
psi *= 180 / np.pi
# Remove invalid values (NaN) at first and last position
phi = phi[1:-1]
psi = psi[1:-1]

# Plot density
figure = plt.figure()
ax = figure.add_subplot(111)
Пример #21
0
def analyze_chirality(array):
    # Filter backbone + CB
    array = array[struc.filter_amino_acids(array)]
    array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))]
    # Iterate over each residue
    ids, names = struc.get_residues(array)
    enantiomers = np.zeros(len(ids), dtype=int)
    for i, id in enumerate(ids):
        coord = array.coord[array.res_id == id]
        if len(coord) != 4:
            # Glyine -> no chirality
            enantiomers[i] = 0
        else:
            enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2],
                                            coord[3])
    return enantiomers


# Fetch and parse structure file
file = rcsb.fetch("1l2y", "mmtf", gettempdir())
stack = strucio.load_structure(file)
# Get first model
array = stack[0]
# Get enantiomers
print("1l2y            ", analyze_chirality(array))
# Reflected structures have opposite enantiomers
# Test via reflection at x-y-plane, z -> -z
array_reflect = array.copy()
array_reflect.coord[:, 2] *= -1
print("1l2y (reflected)", analyze_chirality(array_reflect))
Пример #22
0
def test_fetch_invalid(format):
    with pytest.raises(RequestError):
        file = rcsb.fetch("xxxx", format, biotite.temp_dir(), overwrite=True)
Пример #23
0
def analyze_chirality(array):
    # Filter backbone + CB
    array = array[struc.filter_amino_acids(array)]
    array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))]
    # Iterate over each residue
    ids, names = struc.get_residues(array)
    enantiomers = np.zeros(len(ids), dtype=int)
    for i, id in enumerate(ids):
        coord = array.coord[array.res_id == id]
        if len(coord) != 4:
            # Glyine -> no chirality
            enantiomers[i] = 0
        else:
            enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2],
                                            coord[3])
    return enantiomers


# Fetch and parse structure file
file = rcsb.fetch("1l2y", "mmtf", biotite.temp_dir())
stack = strucio.load_structure(file)
# Get first model
array = stack[0]
# Get enantiomers
print("1l2y            ", analyze_chirality(array))
# Reflected structures have opposite enantiomers
# Test via reflection at x-y-plane, z -> -z
array_reflect = array.copy()
array_reflect.coord[:, 2] *= -1
print("1l2y (reflected)", analyze_chirality(array_reflect))
Пример #24
0
# Code source: Patrick Kunzmann
# License: BSD 3 clause

import numpy as np
import biotite.structure as struc
import biotite.structure.io.mmtf as mmtf
import biotite.database.rcsb as rcsb

# The maximum distance between an atom in the repressor and an atom in
# the DNA for them to be considered 'in contact'
THRESHOLD_DISTANCE = 4.0

# Fetch and load structure
mmtf_file = mmtf.MMTFFile()
mmtf_file.read(rcsb.fetch("2or1", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)

# Separate structure into the DNA and the two identical protein chains
dna = structure[np.isin(structure.chain_id, ["A", "B"])
                & (structure.hetero == False)]
protein_l = structure[(structure.chain_id == "L")
                      & (structure.hetero == False)]
protein_r = structure[(structure.chain_id == "R")
                      & (structure.hetero == False)]
# Quick check if the two protein chains are really identical
assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r))

# Fast identification of contacts via a cell list:
# The cell list is initiliazed with the coordinates of the DNA
# and later provided with the atom coordinates of the two protein chains
Пример #25
0
# Code source: Tom David Müller
# License: BSD 3 clause

from tempfile import gettempdir
import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.sequence.graphics as graphics
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.patches import Arc
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("4p5j", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Get the residue names and residue ids of the nucleotides
residue_ids = []
residue_names = []
for residue in struc.residue_iter(nucleotides):
    mapped_nucleotide, exact_match = struc.map_nucleotide(residue)
    if mapped_nucleotide is None:
        continue
    residue_ids.append(residue[0].res_id)
    if exact_match:
        residue_names.append(mapped_nucleotide)
    else:
#########################################################################
# Now that the raw data is prepared, we can load a protein structure for
# which we will display the glycosylation.
# Here we choose the glycosylated peroxidase *4CUO*, as it contains a
# lot of glycans.
#
# The resulting plot makes only sense for a single protein chain.
# In this case the peroxidase structure has only one chain, but since
# this script should also work for any other structure, we filter out
# a single one.

PDB_ID = "4CUO"
CHAIN_ID = "A"

mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True)
structure = structure[structure.chain_id == CHAIN_ID]

# We will need these later:
# An array containing all residue IDs belonging to amino acids
amino_acid_res_ids = np.unique(structure.res_id[~structure.hetero])
# A dictionary mapping residue IDs to their residue names
ids_to_names = {
    res_id: res_name
    for res_id, res_name in zip(structure.res_id, structure.res_name)
}

########################################################################
# To determine which residues (including the saccharides) are connected
# with each other, we will use a graph representation:
Пример #27
0
########################################################################
# As test case a structure of a *cysteine knot* protein is used,
# specifically the squash trypsin inhibitor *EETI-II*
# (PDB: `2IT7 <http://www.rcsb.org/structure/2IT7>`_).
# This motif is famous for its three characteristic disulfide bridges
# forming a 'knot'.
# However, the loaded MMTF file already has information about the
# covalent bonds - including the disulfide bridges.
# To have a proper test case, all disulfide bonds are removed from the
# structure and we pretend that the structure never had information
# about the disulfide bonds.
# For later verification that the implemented function wroks correctly,
# the disulfide bonds, that are removed, are printed out.

mmtf_file = mmtf.MMTFFile.read(
    rcsb.fetch("2IT7", "mmtf", biotite.temp_dir())
)
knottin = mmtf.get_structure(mmtf_file, include_bonds=True, model=1)
sulfide_indices = np.where(
    (knottin.res_name == "CYS") & (knottin.atom_name == "SG")
)[0]
for i, j, _ in knottin.bonds.as_array():
    if i in sulfide_indices and j in sulfide_indices:
        print(knottin[i])
        print(knottin[j])
        print()
        knottin.bonds.remove_bond(i,j)

########################################################################
# Now the sanitized structure is put into the disulfide detection
# function.
Пример #28
0
and the selecivity filter of the channel protein KcsA (PDB: 2KB1).
The structure was resolved using NMR, so multiple models are present
in the structure.
Hence, we can also calculate the frequency of each bond.
"""

# Code source: Daniel Bauer
# License: BSD 3 clause

import biotite
import matplotlib.pyplot as plt
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb

file_name = rcsb.fetch("2KB1", "mmtf", biotite.temp_dir())
stack = strucio.load_structure(file_name)
# Four identical chains, consider only chain A
chain_a = stack[:, stack.chain_id == "A"]
# Selection for p-helix
p_helix = (chain_a.res_id >= 40) & (chain_a.res_id <= 52)
# Selection for selectivity filter
sf = (chain_a.res_id >= 53) & (chain_a.res_id <= 58)

# Calculate the hydrogen bonds and the frequency of each bond
triplets, mask = struc.hbond(chain_a, selection1=p_helix, selection2=sf)
freq = struc.hbond_frequency(mask)

# Create names of bonds
label = "{d_resid}{d_resnm}-{d_a} -- {a_resid}{a_resnm}-{a_a}"
names = [label.format(
Пример #29
0
"""

# Code source: Tom David Müller
# License: BSD 3 clause

from tempfile import gettempdir
import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.structure.graphics as graphics
import matplotlib.pyplot as plt
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Compute the base pairs and the Leontis-Westhof nomenclature
base_pairs = struc.base_pairs(nucleotides)
glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs)
edges = struc.base_pairs_edge(nucleotides, base_pairs)
base_pairs = struc.get_residue_positions(
    nucleotides, base_pairs.flatten()).reshape(base_pairs.shape)

# Get the one-letter-codes of the bases
base_labels = []
for base in struc.residue_iter(nucleotides):
    base_labels.append(base.res_name[0])
Пример #30
0
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

import biotite
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import scipy.stats as sts

# Download and parse file
file = rcsb.fetch("3vkh", "cif", biotite.temp_dir())
atom_array = strucio.load_structure(file)
# Calculate backbone dihedral angles
# from one of the two identical chains in the asymmetric unit
phi, psi, omega = struc.dihedral_backbone(
    atom_array[atom_array.chain_id == "A"])
# Conversion from radians into degree
phi *= 180 / np.pi
psi *= 180 / np.pi
# Remove invalid values (NaN) at first and last position
phi = phi[1:-1]
psi = psi[1:-1]

# Plot density
figure = plt.figure()
ax = figure.add_subplot(111)