Exemplo n.º 1
0
def plot_pb_scheme_alignment():
    random.seed(1)
    scheme_file = biotite.temp_file("json")
    mat_file = biotite.temp_file("mat")
    with open(mat_file, "w") as file:
        # PB substitution matrix, adapted from PBxplore
        file.write("""
                a     b     c     d     e     f     g     h     i     j     k     l     m     n     o     p
            a  516   -59   113  -105  -411  -177   -27  -361    47  -103  -644  -259  -599  -372  -124   -83
            b  -59   541  -146  -210  -155  -310   -97    90   182  -128   -30    29  -745  -242  -165    22
            c  113  -146   360   -14  -333  -240    49  -438  -269  -282  -688  -682  -608  -455  -147     6
            d -105  -210   -14   221     5  -131  -349  -278  -253  -173  -585  -670 -1573 -1048  -691  -497
            e -411  -155  -333     5   520   185   186   138  -378   -70  -112  -514 -1136  -469  -617  -632
            f -177  -310  -240  -131   185   459   -99   -45  -445    83  -214   -88  -547  -629  -406  -552
            g  -27   -97    49  -349   186   -99   665   -99   -89  -118  -409  -138  -124   172   128   254
            h -361    90  -438  -278   138   -45   -99   632  -205   316   192  -108  -712  -359    95  -399
            i   47   182  -269  -253  -378  -445   -89  -205   696   186     8    15  -709  -269  -169   226
            j -103  -128  -282  -173   -70    83  -118   316   186   768   196     5  -398  -340  -117  -104
            k -644   -30  -688  -585  -112  -214  -409   192     8   196   568   -65  -270  -231  -471  -382
            l -259    29  -682  -670  -514   -88  -138  -108    15     5   -65   533  -131     8   -11  -316
            m -599  -745  -608 -1573 -1136  -547  -124  -712  -709  -398  -270  -131   241    -4  -190  -155
            n -372  -242  -455 -1048  -469  -629   172  -359  -269  -340  -231     8    -4   703    88   146
            o -124  -165  -147  -691  -617  -406   128    95  -169  -117  -471   -11  -190    88   716    58
            p  -83    22     6  -497  -632  -552   254  -399   226  -104  -382  -316  -155   146    58   609
            """)
    gecli.main(args=[
        "--alphabet", "abcdefghijklmnop", "--matrix", mat_file, "--contrast",
        "300", "--lmin", "65", "--lmax", "70", "-f", scheme_file
    ])

    colors = graphics.load_color_scheme(scheme_file)["colors"]
    fig = plt.figure(figsize=(8.0, 5.0))
    ax = fig.gca()

    pb_alphabet = seq.LetterAlphabet("abcdefghijklmnop")
    fasta_file = fasta.FastaFile()
    fasta_file.read(PB_EXAMPLE_FILE_NAME)
    seq_strings = list(fasta_file.values())
    sequences = [
        seq.GeneralSequence(pb_alphabet, seq_str.replace("-", ""))
        for seq_str in seq_strings
    ]
    trace = align.Alignment.trace_from_strings(seq_strings)
    alignment = align.Alignment(sequences, trace, score=None)

    graphics.plot_alignment_type_based(ax,
                                       alignment,
                                       symbols_per_line=60,
                                       spacing=2,
                                       color_scheme=colors)

    fig.tight_layout()
    return fig
Exemplo n.º 2
0
def plot_show_score():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "--show-score", "--smin", "30", "--lmin", "60",
        "--lmax", "70", "-f", scheme_file
    ])
    return plt.gcf()
Exemplo n.º 3
0
def test_fetch_single_file(as_file_like):
    file_name = None if as_file_like else biotite.temp_file("fa")
    file = entrez.fetch_single_file(["1L2Y_A", "3O5R_A"], file_name, "protein",
                                    "fasta")
    fasta_file = fasta.FastaFile.read(file)
    prot_seqs = fasta.get_sequences(fasta_file)
    assert len(prot_seqs) == 2
Exemplo n.º 4
0
def plot_high_contrast_scheme_alignment():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "--contrast", "2000", "--lmin", "60", "--lmax", "75",
        "-f", scheme_file
    ])
    return show_alignment(scheme_file)
Exemplo n.º 5
0
def plot_main_example_alignment():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "--matrix", "BLOSUM62", "--lmin", "60", "--lmax", "75",
        "-f", scheme_file
    ])
    return show_alignment(scheme_file)
Exemplo n.º 6
0
def test_hbond_structure(pdb_id):
    file_name = join(data_dir("structure"), pdb_id + ".mmtf")

    array = load_structure(file_name)
    # Only consider amino acids for consistency
    # with bonded hydrogen detection in MDTraj
    array = array[..., struc.filter_amino_acids(array)]
    if isinstance(array, struc.AtomArrayStack):
        # For consistency with MDTraj 'S' cannot be acceptor element
        # https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/hbond.py#L365
        triplets, mask = struc.hbond(array, acceptor_elements=("O", "N"))
    else:
        triplets = struc.hbond(array, acceptor_elements=("O", "N"))

    # Save to new pdb file for consistent treatment of inscode/altloc
    # im MDTraj
    file_name = biotite.temp_file("pdb")
    save_structure(file_name, array)

    # Compare with MDTraj
    import mdtraj
    traj = mdtraj.load(file_name)
    triplets_ref = mdtraj.baker_hubbard(traj, freq=0, periodic=False)

    # Both packages may use different order
    # -> use set for comparison
    triplets_set = set([tuple(tri) for tri in triplets])
    triplets_ref_set = set([tuple(tri) for tri in triplets_ref])
    assert triplets_set == triplets_ref_set
Exemplo n.º 7
0
def test_pdb_to_gro(path, single_model):
    # Converting stacks between formats should not change data
    model = 1 if single_model else None

    # Read in data
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a1 = pdb_file.get_structure(model=model)

    # Save stack as gro
    tmp_file_name = biotite.temp_file("gro")
    gro_file = gro.GROFile()
    gro_file.set_structure(a1)
    gro_file.write(tmp_file_name)

    # Reload stack from gro
    gro_file = gro.GROFile()
    gro_file.read(tmp_file_name)
    a2 = gro_file.get_structure(model=model)

    assert a1.array_length() == a2.array_length()

    for category in ["res_id", "res_name", "atom_name"]:
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()

    # Mind rounding errors when converting pdb to gro (A -> nm)
    assert a1.coord.flatten().tolist() \
        == approx(a2.coord.flatten().tolist(), abs=1e-2)
Exemplo n.º 8
0
def test_dihedral_backbone_result(file_name):
    import mdtraj

    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(file_name)
    array = mmtf.get_structure(mmtf_file, model=1)
    array = array[struc.filter_amino_acids(array)]
    for chain in struc.chain_iter(array):
        print("Chain: ", chain.chain_id[0])
        if len(struc.check_id_continuity(chain)) != 0:
            # Do not test discontinuous chains
            return
        test_phi, test_psi, test_ome = struc.dihedral_backbone(chain)

        temp_file_name = biotite.temp_file("pdb")
        strucio.save_structure(temp_file_name, chain)
        traj = mdtraj.load(temp_file_name)
        _, ref_phi = mdtraj.compute_phi(traj)
        _, ref_psi = mdtraj.compute_psi(traj)
        _, ref_ome = mdtraj.compute_omega(traj)
        ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0]

        assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3)
        assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3)
        assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
Exemplo n.º 9
0
def plot_high_saturation_scheme_alignment():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "--smin", "30", "--lmin", "55", "--lmax", "75", "-f",
        scheme_file
    ])
    return show_alignment(scheme_file)
Exemplo n.º 10
0
def test_conversion_highlevel(path):
    """
    Test whether the high-level GenBank interface can properly read
    the locus, annotation and sequence from GenBank file and write
    these properties to a file, without data changing.
    """
    suffix = path[-2:]
    gb_file = gb.GenBankFile()
    gb_file.read(path)
    ref_locus = gb.get_locus(gb_file)
    ref_annot_seq = gb.get_annotated_sequence(gb_file, format=suffix)

    gb_file = gb.GenBankFile()
    gb.set_locus(gb_file, *ref_locus)
    gb.set_annotated_sequence(gb_file, ref_annot_seq)
    temp_file_name = biotite.temp_file("gb")
    gb_file.write(temp_file_name)

    gb_file = gb.GenBankFile()
    gb_file.read(temp_file_name)
    test_locus = gb.get_locus(gb_file)
    test_annot_seq = gb.get_annotated_sequence(gb_file, format=suffix)
    assert test_locus == ref_locus
    assert test_annot_seq.sequence == ref_annot_seq.sequence
    assert test_annot_seq.annotation == ref_annot_seq.annotation
    assert test_annot_seq.sequence_start == ref_annot_seq.sequence_start
Exemplo n.º 11
0
def plot_no_green_scheme_alignment():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "--amin", "0", "--lmin", "50", "--lmax", "80", "-f",
        scheme_file
    ])
    return show_alignment(scheme_file)
Exemplo n.º 12
0
def test_conversion_highlevel(path):
    """
    Test whether the high-level GFF3 interface can properly read
    the features from a GFF3 file and write these properties to a file
    without data changing.
    The 'phase' is tested additionally, since it is not part of a
    `Feature` object.
    """
    file = gff.GFFFile.read(join(data_dir("sequence"), path))
    ref_annot = gff.get_annotation(file)
    ref_phases = []
    for _, _, type, _, _, _, _, phase, _ in file:
        if type == "CDS":
            ref_phases.append(phase)

    file = gff.GFFFile()
    gff.set_annotation(file, ref_annot)
    temp_file_name = biotite.temp_file("gff3")
    file.write(temp_file_name)

    file = gff.GFFFile.read(temp_file_name)
    test_annot = gff.get_annotation(file)
    test_phases = []
    for _, _, type, _, _, _, _, phase, _ in file:
        if type == "CDS":
            test_phases.append(phase)
    
    assert ref_annot == test_annot
    assert test_phases == ref_phases
Exemplo n.º 13
0
def test_array_conversion(format):
    template = strucio.load_structure(join(data_dir, "1l2y.mmtf"))[0]
    # Add fake box
    template.box = np.diag([1, 2, 3])
    if format == "trr":
        traj_file_cls = trr.TRRFile
    if format == "xtc":
        traj_file_cls = xtc.XTCFile
    if format == "tng":
        traj_file_cls = tng.TNGFile
    if format == "dcd":
        traj_file_cls = dcd.DCDFile
    if format == "netcdf":
        traj_file_cls = netcdf.NetCDFFile
    traj_file = traj_file_cls()
    traj_file.read(join(data_dir, f"1l2y.{format}"))
    ref_array = traj_file.get_structure(template)

    traj_file = traj_file_cls()
    traj_file.set_structure(ref_array)
    file_name = biotite.temp_file(format)
    traj_file.write(file_name)

    traj_file = traj_file_cls()
    traj_file.read(file_name)
    array = traj_file.get_structure(template)
    assert ref_array.bonds == array.bonds
    assert ref_array.equal_annotation_categories(array)
    assert ref_array.box == pytest.approx(array.box)
    assert ref_array.coord == pytest.approx(array.coord, abs=1e-2)
def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory, overwrite=True)
    try:
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
    except biotite.InvalidFileError:
        # Structure probably contains multiple models with different
        # number of atoms
        # -> Cannot load AtomArrayStack
        # -> Skip writing GRO and NPZ file
        return
    # Create *.gro file
    strucio.save_structure(join(directory, pdb_id + ".npz"), array)
    # Create *.gro files using GROMACS
    # Clean PDB file -> remove inscodes and altlocs
    if include_gro:
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Exemplo n.º 15
0
def plot_constrained_scheme_alignment():
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--seed", "0", "-c", "A", "70", "0", "0", "-c", "W", "70", "-10",
        "-45", "--lmin", "60", "--lmax", "75", "-f", scheme_file
    ])
    return show_alignment(scheme_file)
Exemplo n.º 16
0
def plot_show_example():
    random.seed(0)
    scheme_file = biotite.temp_file("json")
    gecli.main(args=[
        "--show-example", "--smin", "30", "--lmin", "60", "--lmax", "70", "-f",
        scheme_file
    ])
    return plt.gcf()
Exemplo n.º 17
0
def test_fetch_single_file():
    file = entrez.fetch_single_file(["1L2Y_A", "3O5R_A"],
                                    biotite.temp_file("fa"), "protein",
                                    "fasta")
    fasta_file = fasta.FastaFile()
    fasta_file.read(file)
    prot_seqs = fasta.get_sequences(fasta_file)
    assert len(prot_seqs) == 2
Exemplo n.º 18
0
def test_id_overflow():
    # Create an atom array >= 100k atoms
    length = 100000
    a = struc.AtomArray(length)
    a.coord = np.zeros(a.coord.shape)
    a.chain_id = np.full(length, "A")
    # Create residue IDs over 10000
    a.res_id = np.arange(1, length + 1)
    a.res_name = np.full(length, "GLY")
    a.hetero = np.full(length, False)
    a.atom_name = np.full(length, "CA")
    a.element = np.full(length, "C")

    # Write stack to pdb file and make sure a warning is thrown
    with pytest.warns(UserWarning):
        tmp_file_name = biotite.temp_file(".pdb")
        tmp_pdb_file = pdb.PDBFile()
        tmp_pdb_file.set_structure(a)
        tmp_pdb_file.write(tmp_file_name)

    # Assert file can be read properly
    a2 = io.load_structure(tmp_file_name)
    assert (a2.array_length() == a.array_length())

    # Manually check if the written atom id is correct
    with open(tmp_file_name) as output:
        last_line = output.readlines()[-1]
        atom_id = int(last_line.split()[1])
        assert (atom_id == 1)

    # Write stack as hybrid-36 pdb file: no warning should be thrown
    with pytest.warns(None) as record:
        tmp_file_name = biotite.temp_file(".pdb")
        tmp_pdb_file = pdb.PDBFile()
        tmp_pdb_file.set_structure(a, hybrid36=True)
        tmp_pdb_file.write(tmp_file_name)
    assert len(record) == 0

    # Manually check if the output is written as correct hybrid-36
    with open(tmp_file_name) as output:
        last_line = output.readlines()[-1]
        atom_id = last_line.split()[1]
        assert (atom_id == "A0000")
        res_id = last_line.split()[4][1:]
        assert (res_id == "BXG0")
Exemplo n.º 19
0
def test_numpy_objects():
    """
    Test whether the Msgpack encoder is able to handle NumPy values
    (e.g. np.float32) properly.
    """
    mmtf_file = mmtf.MMTFFile()
    mmtf_file["A float"] = np.float32(42.0)
    mmtf_file["A list"] = [np.int64(1), np.int64(2), np.int64(3)]
    mmtf_file["A dictionary"] = {"a": np.bool(True), "b": np.bool(False)}
    mmtf_file.write(biotite.temp_file("mmtf"))
Exemplo n.º 20
0
def test_conversion_lowlevel(path):
    """
    Test whether the low-level GFF3 interface can properly read
    a GenBank file and write a file, without data changing.
    """
    file = gff.GFFFile.read(join(data_dir("sequence"), path))
    ref_entries = [entry for entry in file]

    file = gff.GFFFile()
    for entry in ref_entries:
        file.append(*entry)
    temp_file_name = biotite.temp_file("gff3")
    file.write(temp_file_name)

    file = gff.GFFFile.read(temp_file_name)
    test_entries = [field for field in file]
    assert test_entries == ref_entries
Exemplo n.º 21
0
def create(pdb_id, directory, include_gro):
    # Create *.pdb", *.cif and *.mmtf
    for file_format in ["pdb", "cif", "mmtf"]:
        rcsb.fetch(pdb_id, file_format, directory)
    if include_gro:
        # Create *.gro files using GROMACS
        # Clean PDB file -> remove inscodes and altlocs
        array = strucio.load_structure(join(directory, pdb_id + ".pdb"))
        cleaned_file_name = biotite.temp_file("pdb")
        strucio.save_structure(cleaned_file_name, array)
        # Run GROMACS for file conversion
        subprocess.run([
            "gmx", "editconf", "-f", cleaned_file_name, "-o",
            join(directory, pdb_id + ".gro")
        ],
                       stdout=subprocess.DEVNULL,
                       stderr=subprocess.DEVNULL)
Exemplo n.º 22
0
def test_conversion_lowlevel(path):
    """
    Test whether the low-level GenBank interface can properly read
    a GenBank file and write a file, without data changing.
    """
    gb_file = gb.GenBankFile()
    gb_file.read(path)
    ref_parsed_fields = [field for field in gb_file]

    gb_file = gb.GenBankFile()
    for name, content, subfields in ref_parsed_fields:
        gb_file.append(name, content, subfields)
    temp_file_name = biotite.temp_file("gb")
    gb_file.write(temp_file_name)

    gb_file = gb.GenBankFile()
    gb_file.read(temp_file_name)
    test_parsed_fields = [field for field in gb_file]
    assert test_parsed_fields == ref_parsed_fields
Exemplo n.º 23
0
def test_conversion(chars_per_line):
    path = os.path.join(data_dir, "random.fastq")
    file1 = fastq.FastqFile(offset=33, chars_per_line=chars_per_line)
    file1.read(path)
    ref_content = dict(file1.items())

    file2 = fastq.FastqFile(offset=33, chars_per_line=chars_per_line)
    for identifier, (sequence, scores) in ref_content.items():
        file2[identifier] = sequence, scores
    file2.write(biotite.temp_file("fastq"))

    file3 = fastq.FastqFile(offset=33, chars_per_line=chars_per_line)
    file3.read(path)
    content = dict(file3.items())

    for identifier in ref_content:
        ref_sequence, ref_scores = ref_content[identifier]
        sequence, scores = content[identifier]
        assert ref_sequence == sequence
        assert np.array_equal(ref_scores, scores)
Exemplo n.º 24
0
def test_guess_elements():
    # read valid pdb file
    path = join(data_dir("structure"), "1l2y.pdb")
    pdb_file = pdb.PDBFile.read(path)
    stack = pdb_file.get_structure()

    # remove all elements
    removed_stack = stack.copy()
    removed_stack.element[:] = ''

    # save stack without elements to tmp file
    tmp_file_name = biotite.temp_file(".pdb")
    tmp_pdb_file = pdb.PDBFile()
    tmp_pdb_file.set_structure(removed_stack)
    tmp_pdb_file.write(tmp_file_name)

    # read new stack from file with guessed elements
    guessed_pdb_file = pdb.PDBFile.read(tmp_file_name)
    guessed_stack = guessed_pdb_file.get_structure()

    assert guessed_stack.element.tolist() == stack.element.tolist()
Exemplo n.º 25
0
def test_gro_no_box():
    """
    .gro file format requires valid box parameters at the end of each
    model. However, if we read such a file in, the resulting object should not
    have an assigned box.
    """

    # Create an AtomArray
    atom = Atom([1, 2, 3], atom_name="CA", element="C", res_name="X", res_id=1)
    atoms = array([atom])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read in file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    # Assert no box with 0 dimension
    assert s.box is None
Exemplo n.º 26
0
def test_gro_id_overflow():
    # Create an oversized AtomArray where atom_id > 100000 and res_id > 10000
    num_atoms = 100005
    atoms = array([
        Atom([1, 2, 3],
             atom_name="CA",
             element="C",
             res_name="X",
             res_id=i + 1) for i in range(num_atoms)
    ])
    atoms.box = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

    # Write .gro file
    tmp_file_name = biotite.temp_file(".gro")
    io.save_structure(tmp_file_name, atoms)

    # Read .gro file
    gro_file = gro.GROFile.read(tmp_file_name)
    s = gro_file.get_structure()

    assert s.array_length() == num_atoms
Exemplo n.º 27
0
def test_array_conversion(path, single_model):
    model = 1 if single_model else None
    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(path)
    a1 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
    mmtf_file = mmtf.MMTFFile()
    mmtf.set_structure(mmtf_file, a1)
    temp_file_name = biotite.temp_file("mmtf")
    mmtf_file.write(temp_file_name)

    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(temp_file_name)
    a2 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
    assert a1.bonds == a2.bonds
    if a1.box is not None:
        assert np.allclose(a1.box, a2.box)
Exemplo n.º 28
0
    r"$\sigma^{38}$": "rpoS",
})

# Find SwissProt entries for these genes in NCBI Entrez protein database
uids = []
for name, gene in genes.items():
    query =   entrez.SimpleQuery(gene, "Gene Name") \
            & entrez.SimpleQuery("srcdb_swiss-prot", "Properties") \
            & entrez.SimpleQuery("Escherichia coli K-12", "Organism")
    ids = entrez.search(query, "protein")
    # Only one entry per gene in E. coli K-12 is expected
    assert len(ids) == 1
    uids += ids
# Download corresponding GenBank files as single, merged file
file_name = entrez.fetch_single_file(uids,
                                     biotite.temp_file("gb"),
                                     "protein",
                                     ret_type="gb")

# Array that will hold for each of the genes and each of the 4 domains
# the first and last position
# The array is initally filled with -1, as the value -1 will indicate
# that the domain does not exist in the sigma factor
domain_pos = np.full((len(genes), 4, 2), -1, dtype=int)
# Array that will hold the total sequence length of each sigma factor
seq_lengths = np.zeros(len(genes), dtype=int)
# Read the merged file containing multiple GenBank entries
multi_file = gb.MultiFile()
multi_file.read(file_name)
# Iterate over each GenBank entry
for i, gb_file in enumerate(multi_file):
Exemplo n.º 29
0
It is basically very similar to using normal functions.

In the following sections you will get an overview over the mentioned
subpackages, so go and grab some tea and cookies und let us begin.

Preliminary note
----------------

The files used in this tutorial will be stored in a temporary directory.
The top level package :mod:`biotite` provides functionality to create
a temporary directory,
called ``.biotitetemp`` in your current working directory.
You can either obtain the path to this directory via
:func:`temp_dir` or directly create an unambiguous file name in this
directory using :func:`temp_file`.

In the end of the session the temporary directory and all its contents
will be automatically deleted, so make sure to put the files, you want
keep, somewhere else.
"""

from os.path import relpath
import biotite
# Create temporary directory
dir_path = biotite.temp_dir()
print(relpath(dir_path))
# Get a path to a temporary FASTA file
# This would also create the temporary directory,
# if it was not created, yet
file_path = biotite.temp_file("fasta")
print(relpath(file_path))
Exemplo n.º 30
0
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import biotite
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez

# Generate example alignment
# (the same as in the bacterial luciferase example)
query =   entrez.SimpleQuery("luxA", "Gene Name") \
        & entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
uids = entrez.search(query, db_name="protein")
file_name = entrez.fetch_single_file(uids,
                                     biotite.temp_file("fasta"),
                                     db_name="protein",
                                     ret_type="fasta")
fasta_file = fasta.FastaFile.read(file_name)
sequences = [seq.ProteinSequence(seq_str) for seq_str in fasta_file.values()]
matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, _, _ = align.align_multiple(sequences, matrix)
# Order alignment according to the guide tree
alignment = alignment[:, order]
alignment = alignment[220:300]

# Get color scheme names
alphabet = seq.ProteinSequence.alphabet
schemes = [
    "rainbow", "clustalx", "flower", "blossom", "spring", "wither", "autumn",
    "sunset", "ocean", "zappo", "taylor", "buried", "hydrophobicity",