def test_plot_with_gc_content(tmpdir):

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)

    # Parse the genbank file, plot annotations
    record = SeqIO.read(example_genbank, "genbank")
    graphic_record = BiopythonTranslator().translate_record(record)
    ax, levels = graphic_record.plot()
    graphic_record.plot(ax=ax1, with_ruler=False)

    # Plot the local GC content
    def plot_local_gc_content(record, window_size, ax):
        def gc_content(seq):
            return 100.0 * len([c for c in seq if c in "GC"]) / len(seq)

        yy = [
            gc_content(record.seq[i:i + window_size])
            for i in range(len(record.seq) - window_size)
        ]
        xx = np.arange(len(record.seq) - window_size) + 25
        ax.fill_between(xx, yy, alpha=0.3)
        ax.set_ylabel("GC(%)")

    plot_local_gc_content(record, window_size=50, ax=ax2)

    # Resize the figure to the right height
    target_file = os.path.join(str(tmpdir), "with_plot.png")
    fig.tight_layout()
    fig.savefig(target_file)
def test_from_genbank(tmpdir):
    graphic_record = BiopythonTranslator().translate_record(example_genbank)
    assert len(graphic_record.features) == 11
    ax, _ = graphic_record.plot(figure_width=10)
    ax.figure.tight_layout()
    target_file = os.path.join(str(tmpdir), "from_genbank.png")
    ax.figure.savefig(target_file)
def test_from_genbank_to_circular(tmpdir):
    translator = BiopythonTranslator()
    graphic_record = translator.translate_record(
        example_genbank, record_class=CircularGraphicRecord)
    ax, _ = graphic_record.plot(figure_width=7)
    ax.figure.tight_layout()
    target_file = os.path.join(str(tmpdir), "from_genbank.png")
    ax.figure.savefig(target_file)
def test_plot_with_bokeh(tmpdir):
    gb_record = SeqIO.read(example_genbank, "genbank")
    record = BiopythonTranslator().translate_record(record=gb_record)
    plot = record.plot_with_bokeh(figure_width=8)
    target_file = os.path.join(str(tmpdir), "plot_with_bokeh.html")
    with open(target_file, "w+") as f:
        f.write(file_html(plot, CDN, "Example Sequence"))
    with open(target_file, "r") as f:
        assert len(f.read()) > 5000
def test_multiline_plot():

    translator = BiopythonTranslator()
    graphic_record = translator.translate_record(example_genbank)
    subrecord = graphic_record.crop((1700, 2200))
    fig, axes = subrecord.plot_on_multiple_lines(nucl_per_line=100,
                                                 figure_width=12,
                                                 plot_sequence=True)
    assert 9.5 < fig.get_figheight() < 10
Exemplo n.º 6
0
    def plot(self, ax=None):
        """Plot the fragment and its features on a Matplotlib ax.

        This creates a new ax if no ax is provided. The ax is returned at the
        end.
        """
        graphic_record = BiopythonTranslator().translate_record(self)
        ax, _ = graphic_record.plot(ax=ax, strand_in_label_threshold=7)
        return ax
def test_multipage_plot(tmpdir):
    translator = BiopythonTranslator()
    graphic_record = translator.translate_record(example_genbank)
    subrecord = graphic_record.crop((1800, 2750))
    subrecord.plot_on_multiple_pages(
        pdf_target=os.path.join(str(tmpdir), "test.pdf"),
        nucl_per_line=70,
        lines_per_page=7,
        plot_sequence=True,
    )
def test_plot_with_bokeh_no_labels(tmpdir):
    """Bokeh has a problem with empty lists of labels."""
    gb_record = SeqIO.read(example_genbank, "genbank")
    record = BiopythonTranslator().translate_record(record=gb_record)
    for feature in record.features:
        feature.label = None
    plot = record.plot_with_bokeh(figure_width=8)
    target_file = os.path.join(str(tmpdir), "plot_with_bokeh.html")
    with open(target_file, "w+") as f:
        f.write(file_html(plot, CDN, "Example Sequence"))
    with open(target_file, "r") as f:
        assert len(f.read()) > 5000
Exemplo n.º 9
0
 def compute_feature_label(self, feature):
     if feature.type == 'restriction_site':
         return None
     elif feature.type == "CDS":
         return "CDS here"
     else:
         return BiopythonTranslator.compute_feature_label(self, feature)
Exemplo n.º 10
0
 def compute_feature_label(self, f):
     if f.type != "original" and show_locations:
         return str(int(f.location.start))
     elif show_feature_labels and f.type == "original":
         return BiopythonTranslator.compute_feature_label(f)
     else:
         return None
Exemplo n.º 11
0
 def compute_feature_label(self, feature):
     if self.is_source(feature):
         return "".join(feature.qualifiers["source"])
     elif abs(feature.location.end - feature.location.start) > 100:
         label = BiopythonTranslator.compute_feature_label(self, feature)
         return abreviate_string("".join(label), 30)
     else:
         return None
Exemplo n.º 12
0
 def compute_feature_label(self, feature):
     if abs(feature.location.end - feature.location.start) > 100:
         label = BiopythonTranslator.compute_feature_label(
             self, feature
         )
         return abreviate_string(label, 10)
     else:
         return feature.qualifiers.get("enzyme", None)
Exemplo n.º 13
0
 def compute_feature_label(feature):
     if AssemblyTranslator.is_source(feature):
         return feature.qualifiers['source']
     elif abs(feature.location.end - feature.location.start) > 100:
         label = BiopythonTranslator.compute_feature_label(feature)
         return abreviate_string(label, 30)
     else:
         return None
Exemplo n.º 14
0
 def compute_feature_label(self, f):
     is_edit = f.qualifiers.get("is_edit", "false")
     if "true" in [is_edit, is_edit[0]]:
         return None
     default = BiopythonTranslator.compute_feature_label(self, f)
     label = None if (f.type != "misc_feature") else default
     if label == "misc_feature":
         label = None
     return label
def test_multipage_plot_with_translation(tmpdir):
    # Github issue 61
    translator = BiopythonTranslator()
    graphic_record = translator.translate_record(example_genbank)
    subrecord = graphic_record.crop((1800, 2750))
    translation_params = {
        "location": (1830, 1890),
        "fontdict": {
            "weight": "bold"
        },
        "long_form_translation": False,
    }
    subrecord.plot_on_multiple_pages(
        pdf_target=os.path.join(str(tmpdir), "test_translation.pdf"),
        nucl_per_line=66,
        lines_per_page=7,
        plot_sequence=True,
        translation_params=translation_params,
    )
Exemplo n.º 16
0
    def redraw(self, start=1, end=2000):
        """Plot the features"""

        import matplotlib
        import pylab as plt
        from dna_features_viewer import GraphicFeature, GraphicRecord
        from dna_features_viewer import BiopythonTranslator

        ax = self.ax
        ax.clear()
        rec = self.rec
        length = len(self.rec.seq)
        if start < 0:
            start = 1
        if end <= 0:
            end = start + 2000
        if end - start > 100000:
            end = start + 100000
        if end > length:
            end = length
        rec = self.rec
        translator = BiopythonTranslator(
            features_filters=(lambda f: f.type not in ["gene", "source"], ),
            features_properties=lambda f:
            {"color": self.color_map.get(f.type, "white")},
        )
        #print (start, end, length)
        graphic_record = translator.translate_record(rec)
        cropped_record = graphic_record.crop((start, end))
        #print (len(cropped_record.features))
        cropped_record.plot(strand_in_label_threshold=7, ax=ax)
        if end - start < 150:
            cropped_record.plot_sequence(ax=ax, location=(start, end))
            cropped_record.plot_translation(ax=ax,
                                            location=(start, end),
                                            fontdict={'weight': 'bold'})
        plt.tight_layout()
        self.canvas.draw()
        self.view_range = end - start
        self.loclbl.setText(str(start) + '-' + str(end))
        return
Exemplo n.º 17
0
def gene_plot(gbk_file, **kwargs):
    """Create gene feature plot."""
    color_map = {
        "rep_origin": "yellow",
        "CDS": Colors.cerulean,
        "regulatory": "red",
        "rRNA": Colors.light_cornflower_blue,
        "misc_feature": "lightblue",
    }
    translator = BiopythonTranslator(
        features_filters=(lambda f: f.type not in ["gene", "source"], ),
        features_properties=lambda f:
        {"color": color_map.get(f.type, "white")})
    record = translator.translate_record(gbk_file)
    ax, _ = record.plot(figure_width=300,
                        strand_in_label_threshold=30,
                        **kwargs)
    encoded = fig_to_base64(ax.figure)
    plot = '<pre><img src="data:image/png;base64, {}"></pre>'.format(
        encoded.decode('utf-8'))
    return plot
Exemplo n.º 18
0
def plot_seq(record, annot_residuei=8, title='', xlabel='', plotp=None):
    from dna_features_viewer import BiopythonTranslator
    # graphic_record = BiopythonTranslator().translate_record("seqname.gb")
    graphic_record = BiopythonTranslator().translate_record(record)
    ax, _ = graphic_record.plot(
        figure_width=12.5,
        annotate_inline=True,
        level_offset=0.5,
    )
    graphic_record.plot_sequence(ax=ax, )
    graphic_record.plot_translation(ax=ax, location=[0, 45])
    ax.plot([annot_residuei * 3 - 3.5, annot_residuei * 3 - 0.5], [-2, -2],
            lw=5,
            color='r')
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    #     ax.plot([21,23],[-2,-2])
    if not plotp is None:
        plt.tight_layout()
        ax.figure.savefig(plotp, format='png')
Exemplo n.º 19
0
import matplotlib.pyplot as plt
from dna_features_viewer import BiopythonTranslator
from Bio import SeqIO
import numpy as np

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)

# Parse the genbank file, plot annotations
record = SeqIO.read("example_sequence.gb", "genbank")
graphic_record = BiopythonTranslator().translate_record(record)
ax, levels = graphic_record.plot()
graphic_record.plot(ax=ax1, with_ruler=False)


# Plot the local GC content
def plot_local_gc_content(record, window_size, ax):
    gc_content = lambda s: 100.0 * len([c for c in s if c in "GC"]) / len(s)
    yy = [
        gc_content(record.seq[i:i + window_size])
        for i in range(len(record.seq) - window_size)
    ]
    xx = np.arange(len(record.seq) - window_size) + 25
    ax.fill_between(xx, yy, alpha=0.3)
    ax.set_ylabel("GC(%)")


plot_local_gc_content(record, window_size=50, ax=ax2)

# Resize the figure to the right height
fig.tight_layout()
fig.savefig("with_plot.png")
Exemplo n.º 20
0
def draw_features(rec):
    from dna_features_viewer import BiopythonTranslator
    graphic_record = BiopythonTranslator().translate_record(rec)
    ax, _ = graphic_record.plot(figure_width=20)
    plt.title(rec.id)
    plt.show()
    label = None
    if f.type == "Mutagenesis":
        label = f.qualifiers["Note"][0]
    color = {
        "Mutagenesis": "firebrick",
        "Active site": "yellow",
        "Beta strand": "lightyellow",
        "Chain": "lightcyan",
        "Helix": "honeydew",
        "Initiator methionine": "white",
        "Metal binding": "lightsteelblue",
        "Turn": "moccasin",
    }.get(f.type, "white")
    return dict(color=color, label=label)


# GET THE RECORD FROM UNIPROT

response = urllib.request.urlopen("https://www.uniprot.org/uniprot/P0A7B8.gff")
record_file = StringIO(response.read().decode())

# TRANSLATE AND PLOT THE RECORD

translator = BiopythonTranslator(features_properties=features_properties)
graphic_record = translator.translate_record(record_file)
ax, _ = graphic_record.plot(
    figure_width=15, max_label_length=100, elevate_outline_annotations=True,
)
ax.set_title("Mutation effects in P0A7B8", fontweight="bold", fontsize=16)
ax.figure.savefig("gff_record_from_the_web.png", bbox_inches="tight")
Exemplo n.º 22
0
 def compute_feature_label(self, f):
     return BiopythonTranslator.compute_feature_label(f)[:20]
Exemplo n.º 23
0
    def work(self):
        self.logger(message="Reading Data...")
        data = self.data

        must_contain = [
            s.strip() for s in data.must_contain.split(',') if s.strip() != ''
        ]
        must_not_contain = [
            s.strip() for s in data.must_not_contain.split(',')
            if s.strip() != ''
        ]
        filter_feature_types = [f.lower() for f in data.keep_or_discard_types]

        def feature_text(f):
            return ", ".join([str(v) for v in f.qualifiers.values()])

        def feature_filter(f):
            ftype = f.type.lower()
            keep = data.keep_or_discard == 'keep'
            if filter_feature_types != []:
                in_types = ftype in filter_feature_types
                if (keep and not in_types) or (in_types and not keep):
                    return False
            text = feature_text(f)
            if len(must_contain) and not any([c in text
                                              for c in must_contain]):
                return False
            if len(must_not_contain) and any(
                [c in text for c in must_not_contain]):
                return False
            return True

        def features_properties(f):
            properties = {
                'color': data.default_color,
                'linewidth': data.default_thickness
            }
            if not data.default_display_label:
                properties['label'] = None
            ftype = f.type.lower()
            for fl in data.custom_styles:
                keep = fl.keep_or_discard == 'keep'
                if (fl.selector == 'text'):
                    has_term = fl.feature_text in feature_text(f)
                    if (keep and has_term) or ((not keep) and (not has_term)):
                        properties['color'] = fl.color
                        properties['linewidth'] = fl.thickness
                        if fl.display_label:
                            properties.pop('label', '')
                if (fl.selector == 'type'):
                    is_type = (ftype == fl.feature_type.lower())
                    if (keep and is_type) or ((not keep) and (not is_type)):
                        properties['color'] = fl.color
                        properties['linewidth'] = fl.thickness
                        if fl.display_label:
                            properties.pop('label', '')
            return properties

        display_class = {
            'linear': GraphicRecord,
            'circular': CircularGraphicRecord
        }[data.display]

        translator = BiopythonTranslator(
            features_filters=(feature_filter, ),
            features_properties=features_properties)
        records = records_from_data_files(data.files)
        figures = []
        for rec in self.logger.iter_bar(record=records):
            gr = translator.translate_record(rec, record_class=display_class)
            if not data.plot_full_sequence:
                gr = gr.crop((data.plot_from_position, data.plot_to_position))
            ax, _ = gr.plot(figure_width=data.plot_width,
                            with_ruler=data.plot_ruler,
                            annotate_inline=data.inline_labels)
            if data.plot_nucleotides:
                gr.plot_sequence(ax)
            figure = ax.figure
            figure.suptitle(rec.id)
            figures.append(figure)

        if data.pdf_report:
            pdf_io = BytesIO()

            with PdfPages(pdf_io) as pdf:
                for fig in figures:
                    pdf.savefig(fig, bbox_inches="tight")

            pdf_data = ('data:application/pdf;base64,' +
                        b64encode(pdf_io.getvalue()).decode("utf-8"))
            figures_data = {
                'data': pdf_data,
                'name': 'sequence_feature_plots.pdf',
                'mimetype': 'application/pdf'
            }
        else:
            figures_data = []
            for _file, fig in zip(data.files, figures):
                figdata = matplotlib_figure_to_svg_base64_data(
                    fig, bbox_inches="tight")
                figures_data.append({
                    'img_data': figdata,
                    'filename': _file.name
                })

        return {
            'pdf_report': None if not data.pdf_report else figures_data,
            'figures_data': None if data.pdf_report else figures_data
        }
Exemplo n.º 24
0
"""
from Bio import Entrez, SeqIO
from dna_features_viewer import BiopythonTranslator

# DOWNLOAD THE PLASMID's RECORD FROM NCBI
Entrez.email = "*****@*****.**"
handle = Entrez.efetch(
    db="nucleotide", id=1473096477, rettype="gb", retmode="text"
)
record = SeqIO.read(handle, "genbank")

# CREATE THE GRAPHIC RECORD WITH DNA_FEATURES_VIEWER

color_map = {
    "rep_origin": "yellow",
    "CDS": "orange",
    "regulatory": "red",
    "misc_recomb": "darkblue",
    "misc_feature": "lightblue",
}
translator = BiopythonTranslator(
    features_filters=(lambda f: f.type not in ["gene", "source"],),
    features_properties=lambda f: {"color": color_map.get(f.type, "white")},
)
translator.max_line_length = 15
graphic_record = translator.translate_record(record)
ax, _ = graphic_record.plot(figure_width=8, strand_in_label_threshold=7)
ax.figure.savefig("translator_with_custom_colors.png", bbox_inches="tight")

Exemplo n.º 25
0
def plot_sequence_sites(
    sequence,
    enzymes_names,
    forbidden_enzymes=(),
    unique_sites=True,
    ax=None,
    figure_width=18,
    annotate_inline=True,
):
    """Plot the location of sites in the sequence.

    Non-unique and forbidden sites can be highlighted in red.

    Parameters
    ----------

    sequence
      The sequence of interest. ATGC string.

    enzymes_names
      List of names of the enzymes to plot.

    forbidden_enzymes
      The sites of these enzymes will also be plotted, but with a red
      background.

    unique_sites
      If true, for each enzyme in enzyme_name with more than one site in
      the sequence, these will be plotted on a red background.

    ax
      Matplotlib ax on which to draw the figure. If none is provided a new
      figure is created and the ax is returned at the end.

    figure_width
      Width of the figure if no ax is provided and a new figure is returned.

    annotate_inline
      If True, the enzyme names will be written inside the annotations
      when possible, instead of above.
    """

    record = annotate_enzymes_sites(
        sequence,
        enzymes_names,
        forbidden_enzymes=forbidden_enzymes,
        unique_sites=unique_sites,
    )
    default_props = dict(
        thickness=10,
        box_color=None,
        fontdict=dict(family="Impact", size=7, color="black", weight="normal"),
    )
    translator = BiopythonTranslator(
        features_properties=lambda f: default_props)
    graphic_record = translator.translate_record(record)
    graphic_record.labels_spacing = 1
    ax, _ = graphic_record.plot(figure_width=figure_width,
                                annotate_inline=annotate_inline,
                                ax=ax)
    return ax
Exemplo n.º 26
0
def generate_map():
    record = SeqIO.read("Genome.gb", "genbank")
    graphic_record = BiopythonTranslator().translate_record(record, record_class=CircularGraphicRecord)
    graphic_record.labels_spacing = 20
    ax, _ = graphic_record.plot(figure_width=15, figure_height=15, draw_line=True)
    ax.figure.savefig("solution.jpg")
Exemplo n.º 27
0
def full_assembly_report(
    parts,
    target,
    enzyme="BsmBI",
    max_assemblies=40,
    connector_records=(),
    include_fragments_plots="on_failure",
    include_parts_plots="on_failure",
    include_fragments_connection_graph="on_failure",
    include_assembly_plots=True,
    n_expected_assemblies=None,
    no_skipped_parts=False,
    fragments_filters="auto",
    assemblies_prefix="assembly",
    show_overhangs_in_graph=True,
    show_overhangs_in_genbank=True,
    mix_class="restriction",
):
    """Write a full assembly report in a folder or a zip.

    The report contains the final sequence(s) of the assembly in Genbank format
    as well as a .csv report on all assemblies produced and PDF figures
    to allow a quick overview or diagnostic.

    Folder ``assemblies`` contains the final assemblies, ``assembly_graph``
    contains a schematic view of how the parts assemble together, folder
    ``fragments`` contains the details of all fragments produced by the enzyme
    digestion, and folder ``provided_parts`` contains the original input
    (genbanks of all parts provided for the assembly mix).

    Parameters
    ----------

    parts
      List of Biopython records representing the parts, potentially on entry
      vectors. All the parts provided should have different attributes ``name``
      as it is used to name the files.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    assemblies_prefix
      Prefix for the file names of all assemblies. They will be named
      ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix.

    include_parts_plots, include_assembly_plots
      These two parameters control the rendering of extra figures which are
      great for troubleshooting, but not strictly necessary, and they slow
      down the report generation considerably. They can be True, False, or
      "on_failure" to be True only if the number of assemblies differs from
      n_expected_assemblies
    
    n_expected_assemblies
      Expected number of assemblies. No exception is raised if this number is
      not met, however, if parameters ``include_parts_plots`` and
      ``include_assembly_plots`` are set to "on_failure", then extra plots
      will be plotted. 


    """
    # Make prefix Genbank friendly
    assemblies_prefix = assemblies_prefix.replace(" ", "_")[:18]

    if mix_class == "restriction":
        mix_class = RestrictionLigationMix
    part_names = [p.name for p in parts]
    non_unique = [e for (e, count) in Counter(part_names).items() if count > 1]
    non_unique = list(set(non_unique))
    if len(non_unique) > 0:
        raise ValueError("All parts provided should have different names. "
                         "Assembly (%s) contains several times the parts %s " %
                         (" ".join(part_names), ", ".join(non_unique)))
    if fragments_filters == "auto":
        fragments_filters = [NoRestrictionSiteFilter(enzyme)]

    report = file_tree(target, replace=True)

    assemblies_dir = report._dir("assemblies")

    mix = mix_class(parts, enzyme, fragments_filters=fragments_filters)
    if len(connector_records):
        try:
            mix.autoselect_connectors(connector_records)
        except AssemblyError as err:
            ax = mix.plot_slots_graph(
                with_overhangs=show_overhangs_in_graph,
                show_missing=True,
                highlighted_parts=part_names,
            )
            f = report._file("parts_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
            ax = mix.plot_connections_graph()
            f = report._file("connections_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            raise err

    # ASSEMBLIES
    filters = (FragmentSetContainsPartsFilter(part_names), )
    assemblies = mix.compute_circular_assemblies(
        annotate_homologies=show_overhangs_in_genbank,
        fragments_sets_filters=filters if no_skipped_parts else (),
    )
    assemblies = sorted(
        [asm for (i, asm) in zip(range(max_assemblies), assemblies)],
        key=lambda asm: str(asm.seq),
    )
    assemblies_data = []
    i_asm = list(zip(range(max_assemblies), assemblies))
    for i, asm in i_asm:
        if len(i_asm) == 1:
            name = assemblies_prefix
        else:
            name = "%s_%03d" % (assemblies_prefix, (i + 1))
        asm.name = asm.id = name
        assemblies_data.append(
            dict(
                assembly_name=name,
                parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]),
                number_of_parts=len(asm.fragments),
                assembly_size=len(asm),
            ))
        write_record(asm, assemblies_dir._file(name + ".gb"), "genbank")
        if include_assembly_plots:
            gr_record = AssemblyTranslator().translate_record(asm)
            ax, gr = gr_record.plot(figure_width=16)
            ax.set_title(name)
            ax.set_ylim(top=ax.get_ylim()[1] + 1)
            ax.figure.savefig(
                assemblies_dir._file(name + ".pdf").open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    is_failure = (len(assemblies)
                  == 0) or ((n_expected_assemblies is not None) and
                            (len(assemblies) != n_expected_assemblies))
    if include_fragments_plots == "on_failure":
        include_fragments_plots = is_failure
    if include_parts_plots == "on_failure":
        include_parts_plots = is_failure
    if include_fragments_connection_graph == "on_failure":
        include_fragments_connection_graph = is_failure

    # PROVIDED PARTS
    if include_parts_plots:
        provided_parts_dir = report._dir("provided_parts")
        for part in parts:
            linear = record_is_linear(part, default=False)
            ax, gr = plot_cuts(part, enzyme, linear=linear)
            f = provided_parts_dir._file(part.name + ".pdf").open("wb")
            ax.figure.savefig(f, format="pdf", bbox_inches="tight")
            plt.close(ax.figure)
            gb_file = provided_parts_dir._file(part.name + ".gb")
            write_record(part, gb_file, "genbank")

    # FRAGMENTS
    if include_fragments_plots:
        fragments_dir = report._dir("fragments")
        seenfragments = defaultdict(lambda *a: 0)
        for fragment in mix.fragments:
            gr = BiopythonTranslator().translate_record(fragment)
            ax, _ = gr.plot()
            name = name_fragment(fragment)
            seenfragments[name] += 1
            file_name = "%s_%02d.pdf" % (name, seenfragments[name])
            ax.figure.savefig(
                fragments_dir._file(file_name).open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
    if include_fragments_connection_graph:
        ax = mix.plot_connections_graph()
        f = report._file("connections_graph.pdf")
        ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
        plt.close(ax.figure)

    graph = mix.slots_graph(with_overhangs=False)
    slots_dict = {
        s: "|".join(list(pts))
        for s, pts in mix.compute_slots().items()
    }
    non_linear_slots = [(slots_dict[n],
                         "|".join([slots_dict[b] for b in graph.neighbors(n)]))
                        for n in graph.nodes() if graph.degree(n) != 2]

    # PLOT SLOTS GRAPH
    if len(connector_records):
        highlighted_parts = part_names
    else:
        highlighted_parts = []
    ax = mix.plot_slots_graph(
        with_overhangs=show_overhangs_in_graph,
        show_missing=True,
        highlighted_parts=highlighted_parts,
    )
    f = report._file("parts_graph.pdf")
    ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
    plt.close(ax.figure)

    if len(non_linear_slots):
        report._file("non_linear_nodes.csv").write(
            "\n".join(["part,neighbours"] + [
                "%s,%s" % (part, neighbours)
                for part, neighbours in non_linear_slots
            ]))

    df = pandas.DataFrame.from_records(
        assemblies_data,
        columns=["assembly_name", "number_of_parts", "assembly_size", "parts"],
    )
    df.to_csv(report._file("report.csv").open("w"), index=False)
    n_constructs = len(df)
    if target == "@memory":
        return n_constructs, report._close()
    else:
        if isinstance(target, str):
            report._close()
        return n_constructs
Exemplo n.º 28
0
from Bio import SeqIO
import numpy as np


def plot_local_gc_content(record, window_size, ax):
    """Plot windowed GC content on a designated Matplotlib ax."""
    def gc_content(s):
        return 100.0 * len([c for c in s if c in "GC"]) / len(s)

    yy = [
        gc_content(record.seq[i : i + window_size])
        for i in range(len(record.seq) - window_size)
    ]
    xx = np.arange(len(record.seq) - window_size) + 25
    ax.fill_between(xx, yy, alpha=0.3)
    ax.set_ylim(bottom=0)
    ax.set_ylabel("GC(%)")


record = SeqIO.read("example_sequence.gb", "genbank")
translator = BiopythonTranslator()
graphic_record = translator.translate_record(record)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 5), sharex=True)
ax, levels = graphic_record.plot()
graphic_record.plot(ax=ax1, with_ruler=False)
plot_local_gc_content(record, window_size=50, ax=ax2)

fig.tight_layout()  # Resize the figure to the right height
fig.savefig("with_gc_plot.png")
Exemplo n.º 29
0
    def compute_feature_label(self, feature):

        if "homology" in str(feature.qualifiers.get("label", '')):
            return None
        else:
            return BiopythonTranslator.compute_feature_label(feature)
def test_from_record(tmpdir):
    record = load_record(example_genbank)
    annotate_biopython_record(record, label="bla", color="blue")
    graphic_record = BiopythonTranslator().translate_record(record)
    assert len(graphic_record.features) == 12