def test_plot_with_gc_content(tmpdir):

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)

    # Parse the genbank file, plot annotations
    record = SeqIO.read(example_genbank, "genbank")
    graphic_record = BiopythonTranslator().translate_record(record)
    ax, levels = graphic_record.plot()
    graphic_record.plot(ax=ax1, with_ruler=False)

    # Plot the local GC content
    def plot_local_gc_content(record, window_size, ax):
        def gc_content(seq):
            return 100.0 * len([c for c in seq if c in "GC"]) / len(seq)

        yy = [
            gc_content(record.seq[i:i + window_size])
            for i in range(len(record.seq) - window_size)
        ]
        xx = np.arange(len(record.seq) - window_size) + 25
        ax.fill_between(xx, yy, alpha=0.3)
        ax.set_ylabel("GC(%)")

    plot_local_gc_content(record, window_size=50, ax=ax2)

    # Resize the figure to the right height
    target_file = os.path.join(str(tmpdir), "with_plot.png")
    fig.tight_layout()
    fig.savefig(target_file)
def test_from_genbank(tmpdir):
    graphic_record = BiopythonTranslator().translate_record(example_genbank)
    assert len(graphic_record.features) == 11
    ax, _ = graphic_record.plot(figure_width=10)
    ax.figure.tight_layout()
    target_file = os.path.join(str(tmpdir), "from_genbank.png")
    ax.figure.savefig(target_file)
Exemplo n.º 3
0
    def plot(self, ax=None):
        """Plot the fragment and its features on a Matplotlib ax.

        This creates a new ax if no ax is provided. The ax is returned at the
        end.
        """
        graphic_record = BiopythonTranslator().translate_record(self)
        ax, _ = graphic_record.plot(ax=ax, strand_in_label_threshold=7)
        return ax
Exemplo n.º 4
0
def plot_seq(record, annot_residuei=8, title='', xlabel='', plotp=None):
    from dna_features_viewer import BiopythonTranslator
    # graphic_record = BiopythonTranslator().translate_record("seqname.gb")
    graphic_record = BiopythonTranslator().translate_record(record)
    ax, _ = graphic_record.plot(
        figure_width=12.5,
        annotate_inline=True,
        level_offset=0.5,
    )
    graphic_record.plot_sequence(ax=ax, )
    graphic_record.plot_translation(ax=ax, location=[0, 45])
    ax.plot([annot_residuei * 3 - 3.5, annot_residuei * 3 - 0.5], [-2, -2],
            lw=5,
            color='r')
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    #     ax.plot([21,23],[-2,-2])
    if not plotp is None:
        plt.tight_layout()
        ax.figure.savefig(plotp, format='png')
Exemplo n.º 5
0
def draw_features(rec):
    from dna_features_viewer import BiopythonTranslator
    graphic_record = BiopythonTranslator().translate_record(rec)
    ax, _ = graphic_record.plot(figure_width=20)
    plt.title(rec.id)
    plt.show()
Exemplo n.º 6
0
def generate_map():
    record = SeqIO.read("Genome.gb", "genbank")
    graphic_record = BiopythonTranslator().translate_record(record, record_class=CircularGraphicRecord)
    graphic_record.labels_spacing = 20
    ax, _ = graphic_record.plot(figure_width=15, figure_height=15, draw_line=True)
    ax.figure.savefig("solution.jpg")
Exemplo n.º 7
0
import matplotlib.pyplot as plt
from dna_features_viewer import BiopythonTranslator
from Bio import SeqIO
import numpy as np

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)

# Parse the genbank file, plot annotations
record = SeqIO.read("example_sequence.gb", "genbank")
graphic_record = BiopythonTranslator().translate_record(record)
ax, levels = graphic_record.plot()
graphic_record.plot(ax=ax1, with_ruler=False)


# Plot the local GC content
def plot_local_gc_content(record, window_size, ax):
    gc_content = lambda s: 100.0 * len([c for c in s if c in "GC"]) / len(s)
    yy = [
        gc_content(record.seq[i:i + window_size])
        for i in range(len(record.seq) - window_size)
    ]
    xx = np.arange(len(record.seq) - window_size) + 25
    ax.fill_between(xx, yy, alpha=0.3)
    ax.set_ylabel("GC(%)")


plot_local_gc_content(record, window_size=50, ax=ax2)

# Resize the figure to the right height
fig.tight_layout()
fig.savefig("with_plot.png")
Exemplo n.º 8
0
def full_assembly_report(
    parts,
    target,
    enzyme="BsmBI",
    max_assemblies=40,
    connector_records=(),
    include_fragments_plots="on_failure",
    include_parts_plots="on_failure",
    include_fragments_connection_graph="on_failure",
    include_assembly_plots=True,
    n_expected_assemblies=None,
    no_skipped_parts=False,
    fragments_filters="auto",
    assemblies_prefix="assembly",
    show_overhangs_in_graph=True,
    show_overhangs_in_genbank=True,
    mix_class="restriction",
):
    """Write a full assembly report in a folder or a zip.

    The report contains the final sequence(s) of the assembly in Genbank format
    as well as a .csv report on all assemblies produced and PDF figures
    to allow a quick overview or diagnostic.

    Folder ``assemblies`` contains the final assemblies, ``assembly_graph``
    contains a schematic view of how the parts assemble together, folder
    ``fragments`` contains the details of all fragments produced by the enzyme
    digestion, and folder ``provided_parts`` contains the original input
    (genbanks of all parts provided for the assembly mix).

    Parameters
    ----------

    parts
      List of Biopython records representing the parts, potentially on entry
      vectors. All the parts provided should have different attributes ``name``
      as it is used to name the files.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    assemblies_prefix
      Prefix for the file names of all assemblies. They will be named
      ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix.

    include_parts_plots, include_assembly_plots
      These two parameters control the rendering of extra figures which are
      great for troubleshooting, but not strictly necessary, and they slow
      down the report generation considerably. They can be True, False, or
      "on_failure" to be True only if the number of assemblies differs from
      n_expected_assemblies
    
    n_expected_assemblies
      Expected number of assemblies. No exception is raised if this number is
      not met, however, if parameters ``include_parts_plots`` and
      ``include_assembly_plots`` are set to "on_failure", then extra plots
      will be plotted. 


    """
    # Make prefix Genbank friendly
    assemblies_prefix = assemblies_prefix.replace(" ", "_")[:18]

    if mix_class == "restriction":
        mix_class = RestrictionLigationMix
    part_names = [p.name for p in parts]
    non_unique = [e for (e, count) in Counter(part_names).items() if count > 1]
    non_unique = list(set(non_unique))
    if len(non_unique) > 0:
        raise ValueError("All parts provided should have different names. "
                         "Assembly (%s) contains several times the parts %s " %
                         (" ".join(part_names), ", ".join(non_unique)))
    if fragments_filters == "auto":
        fragments_filters = [NoRestrictionSiteFilter(enzyme)]

    report = file_tree(target, replace=True)

    assemblies_dir = report._dir("assemblies")

    mix = mix_class(parts, enzyme, fragments_filters=fragments_filters)
    if len(connector_records):
        try:
            mix.autoselect_connectors(connector_records)
        except AssemblyError as err:
            ax = mix.plot_slots_graph(
                with_overhangs=show_overhangs_in_graph,
                show_missing=True,
                highlighted_parts=part_names,
            )
            f = report._file("parts_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
            ax = mix.plot_connections_graph()
            f = report._file("connections_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            raise err

    # ASSEMBLIES
    filters = (FragmentSetContainsPartsFilter(part_names), )
    assemblies = mix.compute_circular_assemblies(
        annotate_homologies=show_overhangs_in_genbank,
        fragments_sets_filters=filters if no_skipped_parts else (),
    )
    assemblies = sorted(
        [asm for (i, asm) in zip(range(max_assemblies), assemblies)],
        key=lambda asm: str(asm.seq),
    )
    assemblies_data = []
    i_asm = list(zip(range(max_assemblies), assemblies))
    for i, asm in i_asm:
        if len(i_asm) == 1:
            name = assemblies_prefix
        else:
            name = "%s_%03d" % (assemblies_prefix, (i + 1))
        asm.name = asm.id = name
        assemblies_data.append(
            dict(
                assembly_name=name,
                parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]),
                number_of_parts=len(asm.fragments),
                assembly_size=len(asm),
            ))
        write_record(asm, assemblies_dir._file(name + ".gb"), "genbank")
        if include_assembly_plots:
            gr_record = AssemblyTranslator().translate_record(asm)
            ax, gr = gr_record.plot(figure_width=16)
            ax.set_title(name)
            ax.set_ylim(top=ax.get_ylim()[1] + 1)
            ax.figure.savefig(
                assemblies_dir._file(name + ".pdf").open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    is_failure = (len(assemblies)
                  == 0) or ((n_expected_assemblies is not None) and
                            (len(assemblies) != n_expected_assemblies))
    if include_fragments_plots == "on_failure":
        include_fragments_plots = is_failure
    if include_parts_plots == "on_failure":
        include_parts_plots = is_failure
    if include_fragments_connection_graph == "on_failure":
        include_fragments_connection_graph = is_failure

    # PROVIDED PARTS
    if include_parts_plots:
        provided_parts_dir = report._dir("provided_parts")
        for part in parts:
            linear = record_is_linear(part, default=False)
            ax, gr = plot_cuts(part, enzyme, linear=linear)
            f = provided_parts_dir._file(part.name + ".pdf").open("wb")
            ax.figure.savefig(f, format="pdf", bbox_inches="tight")
            plt.close(ax.figure)
            gb_file = provided_parts_dir._file(part.name + ".gb")
            write_record(part, gb_file, "genbank")

    # FRAGMENTS
    if include_fragments_plots:
        fragments_dir = report._dir("fragments")
        seenfragments = defaultdict(lambda *a: 0)
        for fragment in mix.fragments:
            gr = BiopythonTranslator().translate_record(fragment)
            ax, _ = gr.plot()
            name = name_fragment(fragment)
            seenfragments[name] += 1
            file_name = "%s_%02d.pdf" % (name, seenfragments[name])
            ax.figure.savefig(
                fragments_dir._file(file_name).open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
    if include_fragments_connection_graph:
        ax = mix.plot_connections_graph()
        f = report._file("connections_graph.pdf")
        ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
        plt.close(ax.figure)

    graph = mix.slots_graph(with_overhangs=False)
    slots_dict = {
        s: "|".join(list(pts))
        for s, pts in mix.compute_slots().items()
    }
    non_linear_slots = [(slots_dict[n],
                         "|".join([slots_dict[b] for b in graph.neighbors(n)]))
                        for n in graph.nodes() if graph.degree(n) != 2]

    # PLOT SLOTS GRAPH
    if len(connector_records):
        highlighted_parts = part_names
    else:
        highlighted_parts = []
    ax = mix.plot_slots_graph(
        with_overhangs=show_overhangs_in_graph,
        show_missing=True,
        highlighted_parts=highlighted_parts,
    )
    f = report._file("parts_graph.pdf")
    ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
    plt.close(ax.figure)

    if len(non_linear_slots):
        report._file("non_linear_nodes.csv").write(
            "\n".join(["part,neighbours"] + [
                "%s,%s" % (part, neighbours)
                for part, neighbours in non_linear_slots
            ]))

    df = pandas.DataFrame.from_records(
        assemblies_data,
        columns=["assembly_name", "number_of_parts", "assembly_size", "parts"],
    )
    df.to_csv(report._file("report.csv").open("w"), index=False)
    n_constructs = len(df)
    if target == "@memory":
        return n_constructs, report._close()
    else:
        if isinstance(target, str):
            report._close()
        return n_constructs
Exemplo n.º 9
0
                y="neg_log10_chi_square_pvalue",
                hue="mutation_type",
                data=pval_ann,
                style="mutation_type",
                markers=markers,
                palette=palette,
                ax=ax0)
sns.scatterplot(x="POS",
                y="neg_log10_chi_square_pvalue",
                hue="mutation_type",
                data=pval_ann,
                style="mutation_type",
                markers=markers,
                palette=palette,
                ax=ax1)
record.plot(ax=ax2)
plt.xlabel('Position')
ax1.set_ylabel('Negative log10 p-value')
ax0.set_ylabel('')
box = ax2.get_position()
box.y0 += .08
box.y1 += .08
ax2.set_position(box)
ax1.set_ylim(10**-4.9, 17.49)
ax0.set_ylim(34.9, 45.01)
# ax1.set_yscale('log', basey=10)
ax1.legend(handles=legend_elements,
           loc="upper left",
           ncol=3,
           framealpha=0.5,
           fancybox=True,
Exemplo n.º 10
0
from dna_features_viewer import BiopythonTranslator

graphic_record = BiopythonTranslator().translate_record("example_sequence.gb")
ax, _ = graphic_record.plot(figure_width=10)
ax.figure.tight_layout()
ax.figure.savefig("from_genbank.png")
Exemplo n.º 11
0
def full_assembly_report(parts,
                         target,
                         enzyme="BsmBI",
                         max_assemblies=40,
                         connector_records=(),
                         include_fragments=True,
                         include_parts=True,
                         fragments_filters='auto',
                         assemblies_prefix='assembly',
                         show_overhangs_in_graph=True,
                         show_overhangs_in_genbank=False,
                         mix_class="restriction"):
    """Write a full assembly report in a folder or a zip.

    The report contains the final sequence(s) of the assembly in Genbank format
    as well as a .csv report on all assemblies produced and PDF figures
    to allow a quick overview or diagnostic.

    Folder ``assemblies`` contains the final assemblies, ``assembly_graph``
    contains a schematic view of how the parts assemble together, folder
    ``fragments`` contains the details of all fragments produced by the enzyme
    digestion, and folder ``provided_parts`` contains the original input
    (genbanks of all parts provided for the assembly mix).

    Parameters
    ----------

    parts
      List of Biopython records representing the parts, potentially on entry
      vectors. All the parts provided should have different attributes ``name``
      as it is used to name the files.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    assemblies_prefix
      Prefix for the file names of all assemblies. They will be named
      ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix.


    """

    if mix_class == "restriction":
        mix_class = RestrictionLigationMix
    part_names = [p.name for p in parts]
    non_unique = [e for (e, count) in Counter(part_names).items() if count > 1]
    if len(non_unique) > 0:
        raise ValueError("All parts provided should have different names. "
                         "Assembly (%s) contains several times the parts %s " %
                         (" ".join(part_names), ", ".join(non_unique)))
    if fragments_filters == 'auto':
        fragments_filters = [NoRestrictionSiteFilter(enzyme)]

    report = file_tree(target, replace=True)

    assemblies_dir = report._dir("assemblies")

    mix = mix_class(parts, enzyme, fragments_filters=fragments_filters)
    if len(connector_records):
        mix.autoselect_connectors(connector_records)

    # PROVIDED PARTS
    if include_parts:
        provided_parts_dir = report._dir("provided_parts")
        for part in parts:
            linear = part.linear if hasattr(part, 'linear') else False
            ax, gr = plot_cuts(part, enzyme, linear=linear)
            f = provided_parts_dir._file(part.name + ".pdf").open('wb')
            ax.figure.savefig(f, format='pdf', bbox_inches="tight")
            plt.close(ax.figure)
            gb_file = provided_parts_dir._file(part.name + ".gb")
            SeqIO.write(part, gb_file.open('w'), 'genbank')

    # FRAGMENTS
    if include_fragments:
        fragments_dir = report._dir("fragments")
        seenfragments = defaultdict(lambda *a: 0)
        for fragment in mix.fragments:
            gr = BiopythonTranslator().translate_record(fragment)
            ax, pos = gr.plot()
            name = name_fragment(fragment)
            seenfragments[name] += 1
            file_name = "%s_%02d.pdf" % (name, seenfragments[name])
            ax.figure.savefig(fragments_dir._file(file_name).open('wb'),
                              format='pdf',
                              bbox_inches="tight")
            plt.close(ax.figure)

    # GRAPH
    ax = plot_slots_graph(mix,
                          with_overhangs=show_overhangs_in_graph,
                          show_missing=True)
    f = report._file('parts_graph.pdf')
    ax.figure.savefig(f.open('wb'), format='pdf', bbox_inches='tight')
    plt.close(ax.figure)

    # ASSEMBLIES
    assemblies = mix.compute_circular_assemblies(
        annotate_homologies=show_overhangs_in_genbank)
    assemblies = sorted(
        [asm for (i, asm) in zip(range(max_assemblies), assemblies)],
        key=lambda asm: str(asm.seq))
    assemblies_data = []
    i_asm = list(zip(range(max_assemblies), assemblies))
    for i, asm in i_asm:
        if len(i_asm) == 1:
            name = assemblies_prefix
        else:
            name = '%s_%03d' % (assemblies_prefix, (i + 1))
        assemblies_data.append(
            dict(name=name,
                 parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]),
                 number_of_parts=len(asm.fragments),
                 assembly_size=len(asm)))
        SeqIO.write(asm,
                    assemblies_dir._file(name + '.gb').open('w'), 'genbank')
        gr_record = AssemblyTranslator().translate_record(asm)
        ax, gr = gr_record.plot(figure_width=16)
        ax.set_title(name)
        ax.figure.savefig(assemblies_dir._file(name + '.pdf').open('wb'),
                          format='pdf',
                          bbox_inches='tight')
        plt.close(ax.figure)
    df = pandas.DataFrame.from_records(
        assemblies_data,
        columns=['name', 'number_of_parts', 'assembly_size', 'parts'])
    df.to_csv(report._file('report.csv'), index=False)
    n_constructs = len(df)
    if target == '@memory':
        return n_constructs, report._close()
    else:
        return n_constructs