Пример #1
0
def records_from_data_files(filepaths=None, folder=None):
    """Automatically convert files or a folder's content to Biopython records.
    """
    if folder is not None:
        filepaths = [f._path for f in flametree.file_tree(folder)._all_files]
    records = []
    for filepath in filepaths:
        filename = os.path.basename(filepath)
        if filename.lower().endswith("zip"):
            records += records_from_zip_file(filepath)
            continue
        recs, fmt = records_from_file(filepath)
        single_record = len(recs) == 1
        for i, record in enumerate(recs):
            name_no_extension = "".join(filename.split(".")[:-1])
            name = name_no_extension + ("" if single_record else ("%04d" % i))
            name = name.replace(" ", "_")
            UNKNOWN_IDS = [
                "None",
                "",
                "<unknown id>",
                ".",
                "EXPORTED",
                "<unknown name>",
                "Exported",
            ]
            if has_dna_alphabet:  # Biopython <1.78
                record.seq.alphabet = DNAAlphabet()
            record.annotations["molecule_type"] = "DNA"

            # Sorry for this parts, it took a lot of "whatever works".
            # keep your part names under 20c and pointless, and everything
            # will be good
            if str(record.id).strip() in UNKNOWN_IDS:
                record.id = name
            if str(record.name).strip() in UNKNOWN_IDS:
                record.name = name
            record.file_name = name_no_extension
        records += recs
    return records
Пример #2
0
    def one_backbone(self):
        self.logger(message="Reading Data...")
        data = self.data
        backbone = records_from_data_files([data.backbone])[0]
        inserts = [records_from_data_files([f])[0] for f in data.inserts]
        records = inserts + [backbone]
        for record in records:
            record.linear = False  # Trick
        if data.enzyme == "Autoselect":
            possible_enzymes = ["BsaI", "BsmBI", "BbsI"]
            data.enzyme = autoselect_enzyme(records, enzymes=possible_enzymes)
        zip_root = flametree.file_tree('@memory')
        for insert in inserts:
            record = swap_donor_vector_part(backbone, insert, data.enzyme)
            record.id = insert.id
            write_record(record, zip_root._file(autoname_genbank_file(record)),
                         'genbank')

        if len(inserts) == 1:
            f = zip_root._all_files[0]
            data = f.read('rb')
            return {
                'file': {
                    'data': data_to_html_data(data, 'genbank'),
                    'name': f._name,
                    'mimetype': 'application/genbank'
                },
                'success': 'true',
                'summary': 'Swapping succesful !'
            }
        else:
            return {
                'file': {
                    'data': data_to_html_data(zip_root._close(), 'zip'),
                    'name': 'donor_swap_genbanks.zip',
                    'mimetype': 'application/zip'
                },
                'success': 'yeah!',
                'summary': 'none yet'
            }
Пример #3
0
def records_from_zip_file(zip_file):
    zip_file = flametree.file_tree(file_to_filelike_object(zip_file))
    records = []
    for f in zip_file._all_files:
        ext = f._extension.lower()
        if ext in ['gb', 'fa', 'dna']:
            try:
                new_records, fmt = string_to_record(f.read())
            except:
                content_stream = BytesIO(f.read('rb'))
                try:
                    record = snapgene_file_to_seqrecord(
                        fileobject=content_stream)
                    new_records, fmt = [record], 'snapgene'
                except:
                    try:
                        parser = crazydoc.CrazydocParser(
                            ['highlight_color', 'bold', 'underline'])
                        new_records = parser.parse_doc_file(content_stream)
                        fmt = 'doc'
                    except:
                        raise ValueError("Format not recognized for file " +
                                         f._path)

            single_record = len(new_records) == 1
            for i, record in enumerate(new_records):
                name = record.id
                if name in [
                        None, '', "<unknown id>", '.', ' ', "<unknown name>"
                ]:
                    number = ('' if single_record else ("%04d" % i))
                    name = f._name_no_extension.replace(" ", "_") + number
                name = name.split(".")[0]
                record.id = name
                record.name = name
                record.file_name = f._name_no_extension
            records += new_records
    print([(r.name, r.id) for r in records])
    return records
Пример #4
0
    def validation_analysis(self, data, clones_observations):
        validations = clones_observations.validate_all_clones(
            min_band_cutoff=data.bandsRange[0],
            max_band_cutoff=data.bandsRange[1],
            relative_tolerance=data.tolerance)

        # CREATE A ZIP WITH VALIDATION REPORTS

        zip_root = flametree.file_tree('@memory')
        self.logger(message="Generating the validation report...")
        zip_root._file('validations.pdf').write(
            clones_observations.plot_all_validations_patterns(validations))
        if data.includeDigestionPlots:
            self.logger(message="Plotting cuts maps...")
            co = clones_observations
            pdf_data = plot_all_constructs_cuts_maps([
                (co.constructs_records[cst], digestion_)
                for cst, digestions in co.constructs_digestions.items()
                for digestion_ in digestions
            ])
            zip_root._file('digestions.pdf').write(pdf_data)

        self.logger(message="Generating the success plate map...")
        ax = clones_observations.plot_validations_plate_map(validations)
        ax.figure.savefig(zip_root._file('success_map.pdf').open('wb'),
                          format='pdf',
                          bbox_inches='tight')

        self.logger(message="All done !")

        return {
            'zip_file': {
                'data': data_to_html_data(zip_root._close(), 'zip'),
                'name': 'validation_report.zip',
                'mimetype': 'application/zip'
            },
            'success': 'yeah!'
        }
Пример #5
0
def generate_batch_report(groups,
                          target="@memory",
                          group_naming="group",
                          plot_format="pdf"):
    """Generate a report with CSV and plot describing a groups batch.

    Parameters
    ----------
    groups
      A (ordered) dict {group_name: [elements in the group]}.

    target
      Either path to a folder, or a zip file, or "@memory" to return raw
      data of a zip file containing the report.

    group_naming
      Word that will replace "group" in the report, e.g. "assembly", "team",
      etc.

    plot_format
      Formal of the plot (pdf, png, jpeg, etc).
    """
    root = flametree.file_tree(target)
    csv = ("%s,elements\n" % group_naming) + "\n".join([
        ",".join([group] + list(elements))
        for group, elements in groups.items()
    ])
    root._file("%ss.csv" % group_naming).write(csv)
    ax = plot_batch(groups)
    ax.set_title("Elements per %s" % group_naming)
    ax.figure.savefig(
        root._file("%ss.%s" % (group_naming, plot_format)).open("wb"),
        bbox_inches="tight",
        format=plot_format,
    )
    return root._close()
Пример #6
0
import os
from dnacauldron import load_record
import flametree

data_path = os.path.join("app", "data")
data_dir = flametree.file_tree(data_path)
#
# with open(data_path, "r") as f:
#     DATA = f.read()

connector_records = [
    load_record(f._path, linear=False, id=f._name_no_extension)
    for f in data_dir.genbank.connectors._all_files if f._extension == "gb"
]

backbone = load_record(data_dir.genbank.hc_amp_backbone_gb._path,
                       linear=False,
                       id='hc_amp_backbone')
backbone.is_backbone = True
Пример #7
0
    def work(self):
        self.logger(message="Reading Data...")
        data = self.data

        source_filelike = file_to_filelike_object(data.source_plate)
        source = plate_from_content_spreadsheet(source_filelike)
        source.name = data.source_name

        if ((data.destination_plate is not None)
                and ((data.rearraying_type == 'map') or
                     (data.destination_type == 'existing'))):
            destination = plate_from_content_spreadsheet(dest_filelike)
            destination.name = destination_name
        else:
            destination = get_plate_class(data.destination_size)()
            destination.name = destination_name

        if rearraying_type == 'map':
            # for well in destination.iter_wells():
            # well.content.volume *=  1e-6
            picklist = PickList()
            for well in source.iter_wells():
                if well.is_empty:
                    continue
                part = (well.content.components_as_string())
                destination_well = destination.find_unique_well(
                    condition=lambda w: w.content.components_as_string(
                    ) == part)
                picklist.add_transfer(well, destination_well,
                                      destination_well.volume)
                destination_well.empty_completely()
            picklist.execute()
            picklist_to_tecan_evo_picklist_file(picklist,
                                                "rearray_2018-10-02.gwl")
            plate_to_content_spreadsheet(destination,
                                         "destination_after_picklist.xlsx")

        else:
            pass

        future_plates = picklist.execute(inplace=False)

        def text(w):
            txt = human_volume(w.content.volume)
            if 'construct' in w.data:
                txt = "\n".join([w.data.construct, txt])
            return txt

        plotter = PlateTextPlotter(text)
        ax, _ = plotter.plot_plate(future_plates[destination_plate],
                                   figsize=(20, 8))

        ziproot = flametree.file_tree("@memory", replace=True)
        ax.figure.savefig(ziproot._file("final_mixplate.pdf").open('wb'),
                          format="pdf",
                          bbox_inches="tight")
        plt.close(ax.figure)
        picklist_to_assembly_mix_report(
            picklist,
            ziproot._file("assembly_mix_picklist_report.pdf").open('wb'),
            data=picklist_data)
        assembly_plan.write_report(
            ziproot._file("assembly_plan_summary.pdf").open('wb'))
        if data.dispenser_machine == 'labcyte_echo':
            picklist_to_labcyte_echo_picklist_file(
                picklist,
                ziproot._file("ECHO_picklist.csv").open('w'))
        else:
            picklist_to_tecan_evo_picklist_file(
                picklist,
                ziproot._file("EVO_picklist.gwl").open('w'))
        zip_data = ziproot._close()

        return {
            'file': {
                'data': data_to_html_data(zip_data, 'zip'),
                'name': 'assemblies.zip',
                'mimetype': 'application/zip'
            },
            'success': True
        }
Пример #8
0
def write_no_solution_report(target,
                             problem,
                             error,
                             file_content=None,
                             file_path=None):
    """Write a report on incompatibility found in the problem's constraints.

    The report comprises a PDF of plots of the sequence (global constraints,
    local constraints around the problem) and an annotated genbank.

    Parameters
    ----------
    target
      Either a path to a folder, or a path to a zip archive, or "@memory" to
      return raw data of a zip archive containing the report.

    problem
      A DnaOptimizationProblem

    error
      A NoSolutionError (carries a message and a location)
    """
    if not MATPLOTLIB_AVAILABLE:
        raise ImportError(install_extras_message("Matplotlib"))
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target

    # TRANSFER THE ORIGINAL FILE
    file_hash = None
    if file_path is not None:
        if file_content is None:
            with open(file_path, "rb") as f:
                file_content = f.read()
        basename = os.path.basename(file_path)
        file_hash = hashlib.md5(file_content).hexdigest()[:8]
        root._file("_".join([file_hash, basename])).write(file_content)

    translator = SpecAnnotationsTranslator()
    with PdfPages(root._file("plots.pdf").open("wb")) as pdf_io:

        # PLOT GLOBAL LOCATION OF ERROR

        record = problem.to_record()
        translator = SpecAnnotationsTranslator()
        graphical_record = translator.translate_record(record)
        ax, _ = graphical_record.plot(figure_width=min(20, 0.3 * len(record)))
        if len(record) < 60:
            graphical_record.plot_sequence(ax)
        if error.location is None:
            raise error
        start, end, strand = error.location.to_tuple()
        ax.fill_between([start, end],
                        -10,
                        10,
                        zorder=-1000,
                        facecolor="#ffcccc")
        title = "\n".join(
            textwrap.wrap(
                "No solution found in zone [%d, %d]:%s" %
                (start, end, str(error)),
                width=120,
            ))
        ax.set_title(title, fontdict=TITLE_FONTDICT)
        pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5)
        plt.close(ax.figure)

        # CREATE AND SAVE THE LOCAL CONSTRAINTS BREACHES RECORD

        record = error.problem.to_record(
            with_original_spec_features=False,
            with_constraints=False,
            with_objectives=False,
        )

        start = max(0, error.location.start - 5)
        end = min(len(record), error.location.end + 4)
        focus_location = Location(start, end)

        def is_in_focus(location):
            return location.overlap_region(focus_location) is not None

        evals = error.problem.constraints_evaluations()
        passing = evals.filter("passing")
        record.features += passing.success_and_failures_as_features()
        failing = evals.filter("failing")
        record.features += failing.locations_as_features(
            label_prefix="BREACH", locations_filter=is_in_focus)
        SeqIO.write(
            record,
            root._file("local_constraints_breaches.gb").open("w"),
            "genbank",
        )

        # CREATE A FIGURE OF THE LOCAL CONSTRAINTS BREACHES AS A NEW PDF PAGE

        graphical_record = translator.translate_record(record)
        graphical_record = graphical_record.crop((start, end))
        figure_width = min(20, 0.3 * (end - start))
        ax, _ = graphical_record.plot(figure_width=figure_width)
        graphical_record.plot_sequence(ax)
        ax.set_title(
            "Local constraints breaches in [%d, %d]" % (start, end) +
            "     (green = passing constraints)",
            fontdict=TITLE_FONTDICT,
        )
        ax.set_ylim(top=ax.get_ylim()[1] + 1)
        pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5)
        plt.close(ax.figure)

    root._file("logs.txt").write(problem.logger.dump_logs())

    # returns zip data if target == '@memory'
    if isinstance(target, str):
        return root._close()
Пример #9
0
def write_no_solution_report(target, problem, error):
    """Write a report on incompatibility found in the problem's constraints.

    The report comprises a PDF of plots of the sequence (global constraints,
    local constraints around the problem) and an annotated genbank.

    Parameters
    ----------
    target
      Either a path to a folder, or a path to a zip archive, or "@memory" to
      return raw data of a zip archive containing the report.

    problem
      A DnaOptimizationProblem

    error
      A NoSolutionError (carries a message and a location)
    """
    if not MATPLOTLIB_AVAILABLE:
        raise ImportError(install_extras_message("Matplotlib"))
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target
    translator = SpecAnnotationsTranslator()
    with PdfPages(root._file("plots.pdf").open("wb")) as pdf_io:

        # PLOT GLOBAL LOCATION OF ERROR

        record = problem.to_record()
        translator = SpecAnnotationsTranslator()
        graphical_record = translator.translate_record(record)
        ax, _ = graphical_record.plot(figure_width=min(20, 0.3*len(record)))
        if len(record) < 60:
            graphical_record.plot_sequence(ax)
        if error.location is None:
            raise error
        start, end, strand = error.location.to_tuple()
        ax.fill_between([start, end], -10, 10, zorder=-1000,
                        facecolor='#ffeeee')
        title = "\n".join(textwrap.wrap(
            "No solution found in zone [%d, %d]: %s" %
            (start, end, str(error)), width=120)
        )
        ax.set_title(title, fontdict=TITLE_FONTDICT)
        pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5)
        plt.close(ax.figure)

        # PLOT LOCAL CONSTRAINTS BREACHES

        evals = error.problem.constraints_evaluations()
        record = error.problem.to_record(
            with_original_spec_features=False,
            with_constraints=False, with_objectives=False)
        record.features += evals.filter('passing') \
                                .success_and_failures_as_features()
        record.features += evals.filter('failing') \
                                .locations_as_features(label_prefix="BREACH")
        start = max(0, error.location.start - 5)
        end = min(len(record), error.location.end + 4)
        graphical_record = translator.translate_record(record)
        graphical_record = graphical_record.crop((start, end))
        ax, _ = graphical_record.plot(figure_width=min(20, 0.3*(end - start)))
        graphical_record.plot_sequence(ax)
        ax.set_title("Local constraints breaches in [%d, %d]" % (start, end) +
                     "     (green = passing constraints)",
                     fontdict=TITLE_FONTDICT)
        pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5)
        plt.close(ax.figure)

        # WRITE GENBANK

        record = problem.to_record(with_original_spec_features=False,
                                   with_constraints=True,
                                   with_objectives=True)
        evals = problem.constraints_evaluations()
        record.features += evals.filter('passing') \
                                .success_and_failures_as_features()
        record.features += evals.filter('failing') \
                                .locations_as_features(label_prefix="BREACH")
        SeqIO.write(record, root._file("constraints breaches.gb").open("w"),
                    "genbank")
    root._file('logs.txt').write(problem.logger.dump_logs())

    # returns zip data if target == '@memory'
    if isinstance(target, str):
        return root._close()
Пример #10
0
    def work(self):
        self.logger(message="Reading Data...")
        data = self.data

        # Reading picklist

        picklist_filelike = file_to_filelike_object(data.picklist)
        if data.picklist.name.endswith('.csv'):
            csv = picklist_filelike.read().decode()
            rows = [l.split(',') for l in csv.split("\n") if len(l)]
        else:
            dataframe = pandas.read_excel(picklist_filelike)
            rows = [row for i, row in dataframe.iterrows()]
        assembly_plan = AssemblyPlan(OrderedDict([
            (row[0], [str(e).strip()
                      for e in row[1:]
                      if str(e).strip() not in ['-', 'nan', '']])
            for row in rows
            if row[0] not in ['nan', 'Construct name', 'constructs',
                              'construct']
        ]))
        for assembly, parts in assembly_plan.assemblies.items():
            assembly_plan.assemblies[assembly] = [
                part.replace(" ", "_") for part in parts
            ]

        # Reading part infos

        if len(data.parts_infos):
            first_file = data.parts_infos[0]
            if first_file.name.endswith(('.csv', '.xls', '.xlsx')):
                first_file_filelike = file_to_filelike_object(first_file)
                if first_file.name.endswith('.csv'):
                    dataframe = pandas.read_csv(first_file_filelike)
                else:
                    dataframe = pandas.read_excel(first_file_filelike)
                parts_data = {
                    row.part: {'size': row['size']}
                    for i, row in dataframe.iterrows()
                }
            else:
                records = records_from_data_files(data.parts_infos)
                if data.use_file_names_as_ids:
                    for r in records:
                        r.id = r.name = r.file_name
                parts_data = {
                    rec.id.replace(" ", "_"): {'record': rec}
                    for rec in records
                }
            assembly_plan.parts_data = parts_data
            parts_without_data = assembly_plan.parts_without_data()
            if len(parts_without_data):
                return {
                    'success': False,
                    'message': 'Some parts have no provided record or data.',
                    'missing_parts': parts_without_data
                }

        # Reading protocol
        
        if data.quantity_unit == 'fmol':
            part_mol = data.part_quantity * 1e-15
            part_g = None
        if data.quantity_unit == 'nM':
            part_mol = data.part_quantity * data.total_volume * 1e-15
            part_g = None
        if data.quantity_unit == 'ng':
            part_mol = None
            part_g = data.part_quantity * 1e-9

        self.logger(message='Generating picklist')

        picklist_generator = AssemblyPicklistGenerator(
            part_mol=part_mol,
            part_g=part_g,
            complement_to=data.total_volume * 1e-6,
            buffer_volume=data.buffer_volume * 1e-6,
            volume_rounding=2.5e-9,
            minimal_dispense_volume=5e-9
        )
        source_filelike = file_to_filelike_object(data.source_plate)
        source_plate = plate_from_content_spreadsheet(source_filelike)
        for well in source_plate.iter_wells():
            if well.is_empty:
                continue
            quantities = well.content.quantities
            part, quantity = quantities.items()[0]
            quantities.pop(part)
            quantities[part.replace(" ", "_")] = quantity

        source_plate.name = "Source"

        self.logger(message="Generating Picklist...")
        destination_plate = Plate4ti0960("Mixplate")
        if data.destination_plate:
            dest_filelike = file_to_filelike_object(data.destination_plate)
            destination_plate = plate_from_content_spreadsheet(dest_filelike)
        destination_wells = (
            well
            for well in destination_plate.iter_wells(direction='column')
            if well.is_empty
        )
        picklist, picklist_data = picklist_generator.make_picklist(
            assembly_plan,
            source_wells=source_plate.iter_wells(),
            destination_wells=destination_wells
        )
        if picklist is None:
            return {
                'success': False,
                'message': 'Some parts in the assembly plan have no '
                           'corresponding well.',
                'picklist_data': picklist_data,
                'missing_parts': picklist_data.get('missing_parts', None)
            }
        future_plates = picklist.execute(inplace=False)

        def text(w):
            txt = human_volume(w.content.volume)
            if 'construct' in w.data:
                txt = "\n".join([w.data.construct, txt])
            return txt
        plotter = PlateTextPlotter(text)
        ax, _ = plotter.plot_plate(future_plates[destination_plate],
                                   figsize=(20, 8))

        ziproot = flametree.file_tree("@memory", replace=True)

        # MIXPLATE MAP PLOT

        ax.figure.savefig(
            ziproot._file("final_mixplate.pdf").open('wb'),
            format="pdf",
            bbox_inches="tight")
        plt.close(ax.figure)
        plate_to_platemap_spreadsheet(
            future_plates[destination_plate],
            lambda w: w.data.get('construct', ''),
            filepath=ziproot._file('final_mixplate.xls').open('wb'))

        self.logger(message="Writing report...")

        # ASSEMBLY REPORT

        picklist_to_assembly_mix_report(
            picklist,
            ziproot._file("assembly_mix_picklist_report.pdf").open('wb'),
            data=picklist_data)
        assembly_plan.write_report(
            ziproot._file("assembly_plan_summary.pdf").open('wb'))

        # MACHINE PICKLIST

        if data.dispenser_machine == 'labcyte_echo':
            picklist_to_labcyte_echo_picklist_file(
                picklist, ziproot._file("ECHO_picklist.csv").open('w'))
        else:
            picklist_to_tecan_evo_picklist_file(
                picklist, ziproot._file("EVO_picklist.gwl").open('w'))
        raw = file_to_filelike_object(data.source_plate).read()
        f = ziproot._file(data.source_plate.name)
        f.write(raw, mode='wb')
        zip_data = ziproot._close()

        return {
             'file': {
                 'data': data_to_html_data(zip_data, 'zip'),
                 'name': 'picklist.zip',
                 'mimetype': 'application/zip'
             },
             'success': True
        }
Пример #11
0
def load_records_from_files(files=None,
                            folder=None,
                            use_file_names_as_ids=False):
    """Automatically convert files or a folder's content to biopython records.

    Parameters
    ----------

    files
      A list of path to files. A ``folder`` can be provided instead.

    folder
      A path to a folder containing sequence files.

    use_file_names_as_ids
      If True, for every file containing a single record, the file name
      (without extension) will be set as the record's ID.
    """
    if files is not None:
        for file in files:
            if isinstance(file, str) and not os.path.exists(file):
                raise IOError("File %s not found" % file)

    if folder is not None:
        files = [f._path for f in flametree.file_tree(folder)._all_files]
    records = []
    for filepath in files:
        filename = os.path.basename(filepath)
        if filename.lower().endswith("zip"):
            records += _load_records_from_zip_file(
                filepath, use_file_names_as_ids=use_file_names_as_ids)
            continue
        recs, fmt = load_records_from_file(filepath)
        single_record = len(recs) == 1
        for i, record in enumerate(recs):
            name_no_extension = "".join(filename.split(".")[:-1])
            name = name_no_extension + ("" if single_record else ("%04d" % i))
            name = name.replace(" ", "_")
            UNKNOWN_IDS = [
                "None",
                "",
                "<unknown id>",
                ".",
                "EXPORTED",
                "<unknown name>",
                "Exported",
            ]

            if has_dna_alphabet:  # Biopython <1.78
                record.seq.alphabet = DNAAlphabet()
            record.annotations["molecule_type"] = "DNA"

            # Sorry for this parts, it took a lot of "whatever works".
            # keep your part names under 20c and pointless, and everything
            # will be good
            if str(record.id).strip() in UNKNOWN_IDS:
                record.id = name
            if str(record.id).strip() in UNKNOWN_IDS:
                record.id = name
            record.file_name = name_no_extension
            if use_file_names_as_ids and single_record:
                basename = os.path.basename(record.source_file)
                basename_no_extension = os.path.splitext(basename)[0]
                record.id = basename_no_extension
        records += recs
    return records
def full_assembly_plan_report(assembly_plan,
                              target,
                              part_records=None,
                              enzyme="autoselect",
                              assert_single_assemblies=True,
                              logger="bar",
                              connector_records=(),
                              fail_silently=True,
                              errors_with_traceback=False,
                              **report_kwargs):
    """Makes a full report for a plan (list of single assemblies)

    Parameters
    ----------

    assembly_plan
      A list ``[('name', [parts])...]`` or a dict ``{name: [parts]}`` where
      the parts are either records, or simply part names (in that case you
      must provide the records in ``parts_records``)

    parts_records
      A dict {part_name: part_record}.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    **report_kwargs
      Any other parameter of ``full_assembly_report``. For instance:
      include_fragments_plots, include_parts_plot, include_assembly_plots

    Returns
    -------

    errored_assemblies,zip_data
      list of errored assemblies with errors, and binary zip data (or None if
      the target is not "@memory")
      
    """
    logger = default_bar_logger(logger)
    if isinstance(assembly_plan, list):
        assembly_plan = OrderedDict(assembly_plan)
    if isinstance(list(assembly_plan.values())[0][0], str):
        if not hasattr(part_records, "items"):
            part_records = {r.name: r for r in part_records}
        for part in list(part_records):
            part_records[part] = deepcopy(part_records[part])
            part_records[part].name = part_records[part].id = part
        assembly_plan = OrderedDict([(name, [part_records[p] for p in parts])
                                     for name, parts in assembly_plan.items()])
    root = file_tree(target, replace=True)
    all_records_folder = root._dir("all_records")
    errored_assemblies = []
    assemblies = list(assembly_plan.items())
    selected_enzymes = []  # Used to keep track of autoselected enzymes
    for asm_name, parts in logger.iter_bar(assembly=assemblies):
        if enzyme == "autoselect":
            selected_enzyme = autoselect_enzyme(parts)
            selected_enzymes.append((asm_name, selected_enzyme))
        else:
            selected_enzyme = enzyme
        asm_folder = root._dir(asm_name)
        try:
            n = full_assembly_report(
                parts,
                target=asm_folder,
                assemblies_prefix=asm_name,
                enzyme=selected_enzyme,
                connector_records=connector_records,
                n_expected_assemblies=1 if assert_single_assemblies else None,
                **report_kwargs)
            if assert_single_assemblies and (n != 1):
                raise ValueError("%s assemblies found instead of 1 for %s." %
                                 (n, asm_name))
            for f in asm_folder.assemblies._all_files:
                if f._extension == "gb":
                    f.copy(all_records_folder)
        except Exception as err:
            if fail_silently:
                err_string = str(err)
                if errors_with_traceback:
                    err_string += str(err.__traceback__)
                errored_assemblies.append((asm_name, str(err)))
            else:
                raise err

    if len(errored_assemblies):
        root._file("errored_assemblies.txt").write("\n\n".join(
            ["%s: %s" % (name, error) for name, error in errored_assemblies]))
    f = root._file("assembly_plan.csv")
    f.write("construct, parts")
    all_parts = []
    for f_ in root._all_files:
        if f_._name_no_extension == "report":
            first_row = f_.read("r").split("\n")[1].split(",")
            if len(first_row) == 4:
                name, _, _, parts = first_row
                parts = parts.split(" & ")
                all_parts += parts
                f.write("\n" + ",".join([name] + parts))
    all_parts = sorted(set(all_parts))
    root._file("all_parts.csv").write(",\n".join(all_parts))
    if enzyme == "autoselect":
        root._file("selected_enzymes_per_construct.csv").write(",\n".join(
            [",".join(selection) for selection in selected_enzymes]))
    return errored_assemblies, root._close()
Пример #13
0
def full_assembly_report(parts,
                         target,
                         enzyme="BsmBI",
                         max_assemblies=40,
                         connector_records=(),
                         include_fragments=True,
                         include_parts=True,
                         fragments_filters='auto',
                         assemblies_prefix='assembly',
                         show_overhangs_in_graph=True,
                         show_overhangs_in_genbank=False,
                         mix_class="restriction"):
    """Write a full assembly report in a folder or a zip.

    The report contains the final sequence(s) of the assembly in Genbank format
    as well as a .csv report on all assemblies produced and PDF figures
    to allow a quick overview or diagnostic.

    Folder ``assemblies`` contains the final assemblies, ``assembly_graph``
    contains a schematic view of how the parts assemble together, folder
    ``fragments`` contains the details of all fragments produced by the enzyme
    digestion, and folder ``provided_parts`` contains the original input
    (genbanks of all parts provided for the assembly mix).

    Parameters
    ----------

    parts
      List of Biopython records representing the parts, potentially on entry
      vectors. All the parts provided should have different attributes ``name``
      as it is used to name the files.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    assemblies_prefix
      Prefix for the file names of all assemblies. They will be named
      ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix.


    """

    if mix_class == "restriction":
        mix_class = RestrictionLigationMix
    part_names = [p.name for p in parts]
    non_unique = [e for (e, count) in Counter(part_names).items() if count > 1]
    if len(non_unique) > 0:
        raise ValueError("All parts provided should have different names. "
                         "Assembly (%s) contains several times the parts %s " %
                         (" ".join(part_names), ", ".join(non_unique)))
    if fragments_filters == 'auto':
        fragments_filters = [NoRestrictionSiteFilter(enzyme)]

    report = file_tree(target, replace=True)

    assemblies_dir = report._dir("assemblies")

    mix = mix_class(parts, enzyme, fragments_filters=fragments_filters)
    if len(connector_records):
        mix.autoselect_connectors(connector_records)

    # PROVIDED PARTS
    if include_parts:
        provided_parts_dir = report._dir("provided_parts")
        for part in parts:
            linear = part.linear if hasattr(part, 'linear') else False
            ax, gr = plot_cuts(part, enzyme, linear=linear)
            f = provided_parts_dir._file(part.name + ".pdf").open('wb')
            ax.figure.savefig(f, format='pdf', bbox_inches="tight")
            plt.close(ax.figure)
            gb_file = provided_parts_dir._file(part.name + ".gb")
            SeqIO.write(part, gb_file.open('w'), 'genbank')

    # FRAGMENTS
    if include_fragments:
        fragments_dir = report._dir("fragments")
        seenfragments = defaultdict(lambda *a: 0)
        for fragment in mix.fragments:
            gr = BiopythonTranslator().translate_record(fragment)
            ax, pos = gr.plot()
            name = name_fragment(fragment)
            seenfragments[name] += 1
            file_name = "%s_%02d.pdf" % (name, seenfragments[name])
            ax.figure.savefig(fragments_dir._file(file_name).open('wb'),
                              format='pdf',
                              bbox_inches="tight")
            plt.close(ax.figure)

    # GRAPH
    ax = plot_slots_graph(mix,
                          with_overhangs=show_overhangs_in_graph,
                          show_missing=True)
    f = report._file('parts_graph.pdf')
    ax.figure.savefig(f.open('wb'), format='pdf', bbox_inches='tight')
    plt.close(ax.figure)

    # ASSEMBLIES
    assemblies = mix.compute_circular_assemblies(
        annotate_homologies=show_overhangs_in_genbank)
    assemblies = sorted(
        [asm for (i, asm) in zip(range(max_assemblies), assemblies)],
        key=lambda asm: str(asm.seq))
    assemblies_data = []
    i_asm = list(zip(range(max_assemblies), assemblies))
    for i, asm in i_asm:
        if len(i_asm) == 1:
            name = assemblies_prefix
        else:
            name = '%s_%03d' % (assemblies_prefix, (i + 1))
        assemblies_data.append(
            dict(name=name,
                 parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]),
                 number_of_parts=len(asm.fragments),
                 assembly_size=len(asm)))
        SeqIO.write(asm,
                    assemblies_dir._file(name + '.gb').open('w'), 'genbank')
        gr_record = AssemblyTranslator().translate_record(asm)
        ax, gr = gr_record.plot(figure_width=16)
        ax.set_title(name)
        ax.figure.savefig(assemblies_dir._file(name + '.pdf').open('wb'),
                          format='pdf',
                          bbox_inches='tight')
        plt.close(ax.figure)
    df = pandas.DataFrame.from_records(
        assemblies_data,
        columns=['name', 'number_of_parts', 'assembly_size', 'parts'])
    df.to_csv(report._file('report.csv'), index=False)
    n_constructs = len(df)
    if target == '@memory':
        return n_constructs, report._close()
    else:
        return n_constructs
Пример #14
0
def test_assembly_report(tmpdir):
    data_path = os.path.join("tests", "data", "assembly_report")
    root = flametree.file_tree(data_path)
    df = pandas.read_excel(root.example_picklist_xls.open('rb'), index=0)
    assembly_plan = AssemblyPlan(
        OrderedDict([(row[0],
                      [e for e in row[1:] if str(e) not in ['-', 'nan']])
                     for i, row in df.iterrows()
                     if row[0] not in ['nan', 'Construct name']]))
    parts_zip = flametree.file_tree(root.emma_parts_zip._path)

    def read(f):
        record = SeqIO.read(f.open('r'), 'genbank')
        record.id = f._name_no_extension
        return record

    parts_data = {
        f._name_no_extension: {
            'record': read(f)
        }
        for f in parts_zip._all_files if f._extension == 'gb'
    }
    assembly_plan.parts_data = parts_data
    source_plate = plate_from_content_spreadsheet(
        root.example_echo_plate_xlsx._path)

    source_plate.name = "Source"
    for well in source_plate.iter_wells():
        if not well.is_empty:
            content = well.content.components_as_string()
            well.content.quantities[content] *= 1e-3

    destination_plate = Plate4ti0960("Mixplate")

    picklist_generator = AssemblyPicklistGenerator(
        part_mol=1.3e-15,
        complement_to=1e-6,
        buffer_volume=300e-9,
        volume_rounding=2.5e-9,
        minimal_dispense_volume=5e-9)
    picklist, data = picklist_generator.make_picklist(
        assembly_plan,
        source_wells=source_plate.iter_wells(),
        destination_wells=destination_plate.iter_wells(direction='column'),
        complement_well=source_plate.wells.O24,
        buffer_well=source_plate.wells.P24)
    future_plates = picklist.execute(inplace=False)

    def text(w):
        txt = human_volume(w.content.volume)
        if 'construct' in w.data:
            txt = "\n".join([w.data.construct, txt])
        return txt

    plotter = PlateTextPlotter(text)
    ax, _ = plotter.plot_plate(future_plates[destination_plate],
                               figsize=(20, 8))

    ziproot = flametree.file_tree(os.path.join(str(tmpdir), 'a.zip'))
    ax.figure.savefig(ziproot._file("final_mixplate.pdf").open('wb'),
                      format="pdf",
                      bbox_inches="tight")
    plt.close(ax.figure)
    picklist_to_assembly_mix_report(
        picklist,
        ziproot._file("assembly_mix_picklist_report.pdf").open('wb'),
        data=data)
    assembly_plan.write_report(
        ziproot._file("assembly_plan_summary.pdf").open('wb'))
    picklist_to_labcyte_echo_picklist_file(
        picklist,
        ziproot._file("ECHO_picklist.csv").open('w'))
    ziproot._close()
Пример #15
0
def test_directory(tmpdir):
    # CREATE AND POPULATE A DIRECTORY FROM SCRATCH
    dir_path = os.path.join(str(tmpdir), "test_dir")
    root = file_tree(dir_path)
    assert root._file_manager.__class__ == DiskFileManager
    root._file("Readme.md").write("This is a test zip")
    root._dir("texts")._dir("shorts")._file("bla.txt").write("bla bla bla")
    root.texts.shorts._file("bli.txt").write("bli bli bli")
    root.texts.shorts._file("blu.txt").write("blu blu blu")

    # READ AN EXISTING FILE (two ways)
    assert root.texts.shorts.bla_txt.read() == "bla bla bla"
    with root.texts.shorts.bla_txt.open("r") as f:
        assert f.read() == "bla bla bla"

    # TEST REPLACE BEHAVIOR (replace=False)
    root._dir("trash")._file("bla.txt").write("bla bla bla")
    root._dir("trash")._file("blu.txt").write("blu blu blu")
    assert root.trash._filenames == ["blu.txt"]

    root.trash._delete()
    root._dir("trash")._file("bla.txt").write("bla bla bla")
    root._dir("trash", replace=False)._file("blu.txt").write("blu blu blu")
    assert set(root.trash._filenames) == set(["bla.txt", "blu.txt"])

    # READ AN EXISTING DIRECTORY
    root = file_tree(dir_path)
    assert set([f._name for f in root._all_files]) == ALL_FILES

    # APPEND TO AN EXISTING DIRECTORY
    root._dir("newdir")._file("new_file.txt").write("I am a new file")
    root = file_tree(dir_path)
    assert set([f._name for f in root._all_files
                ]) == ALL_FILES.union(set(["new_file.txt"]))

    # TEST DELETION
    path = root.newdir.new_file_txt._path
    assert os.path.exists(path)
    root.newdir.new_file_txt.delete()
    assert not os.path.exists(path)
    assert not any([f._path == path for f in root.newdir._files])

    # TEST DIRECTORY COPYING
    root._dir("new_folder")
    root.texts._copy(root.new_folder)
    assert [d._name for d in root.new_folder._dirs] == ["texts"]

    # TEST DIRECTORY MOVING
    root._dir("newer_folder")
    root.new_folder.texts._move(root.newer_folder)
    assert [d._name for d in root.new_folder._dirs] == []
    assert [d._name for d in root.newer_folder._dirs] == ["texts"]

    # TEST FILE COPYING
    root._dir("newest_folder")
    root.newer_folder.texts.shorts.bla_txt.copy(root.newest_folder)
    assert [f._name for f in root.newest_folder._files] == ["bla.txt"]

    # TEST FILE MOVING
    root._dir("newester_folder")
    root.newer_folder.texts.shorts.bla_txt.move(root.newester_folder)
    assert [f._name for f in root.newester_folder._files] == ["bla.txt"]
    remaining_files = set([d._name for d in root.newer_folder._all_files])
    assert remaining_files == set(["bli.txt", "blu.txt"])
Пример #16
0
def parts_ids_from_geneart_records_dir(folder):
    records_root = flametree.file_tree(folder)
    return {
        dir_._name.split("_")[0]: "_".join(dir_._name.split("_")[1:])
        for dir_ in records_root._dirs
    }
Пример #17
0
def write_optimization_report(target,
                              problem,
                              project_name="unnammed",
                              constraints_evaluations=None,
                              objectives_evaluations=None,
                              figure_width=20,
                              max_features_in_plots=300):
    """Write an optimization report with a PDF summary, plots, and genbanks.

    Parameters
    ----------

    """
    if constraints_evaluations is None:
        constraints_evaluations = problem.constraints_evaluations()
    if objectives_evaluations is None:
        objectives_evaluations = problem.objectives_evaluations()
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target
    translator = SpecAnnotationsTranslator()
    # CREATE FIGURES AND GENBANKS

    with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io:

        figures_data = [
            ("Before", sequence_to_biopython_record(problem.sequence_before),
             problem.constraints_before, problem.objectives_before, []),
            ("After", sequence_to_biopython_record(problem.sequence),
             constraints_evaluations, objectives_evaluations,
             problem.sequence_edits_as_features())
        ]

        plot_height = None
        for (title, record, constraints, objectives, edits) in figures_data:

            full_title = (
                "{title}:        {nfailing} constraints failing (in red)"
                "        Total Score: {score:.01E} {bars}").format(
                    title=title,
                    score=objectives.scores_sum(),
                    nfailing=len(constraints.filter("failing").evaluations),
                    bars="" if
                    (title == "Before") else "       (bars indicate edits)")
            ax = None
            if title == "After":
                record.features += edits
                graphical_record = translator.translate_record(record)
                fig, ax = plt.subplots(1, figsize=(figure_width, plot_height))
                graphical_record.plot(ax=ax, level_offset=-0.3)
                record.features = []

            record.features += constraints.success_and_failures_as_features()
            record.features += objectives.success_and_failures_as_features()

            graphical_record = translator.translate_record(record)
            ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width)
            ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT)
            plot_height = ax.figure.get_size_inches()[1]
            pdf_io.savefig(ax.figure, bbox_inches="tight")
            plt.close(ax.figure)

            record.features += edits
            breaches_locations = \
                constraints.filter("failing") \
                           .locations_as_features(label_prefix="Breach from",
                                                  merge_overlapping=True)
            record.features += breaches_locations

            SeqIO.write(record,
                        root._file(title.lower() + ".gb").open("w"), "genbank")

            if breaches_locations != []:
                record.features = breaches_locations
                graphical_record = translator.translate_record(record)
                if len(graphical_record.features) > max_features_in_plots:
                    features = sorted(graphical_record.features,
                                      key=lambda f: f.start - f.end)
                    new_ft = features[:max_features_in_plots]
                    graphical_record.features = new_ft
                    message = "(only %d features shown)" % \
                              max_features_in_plots
                else:
                    message = ""
                ax, _ = graphical_record.plot(figure_width=figure_width)
                ax.set_title(title + ": Constraints breaches locations" +
                             message,
                             loc="left",
                             fontdict=TITLE_FONTDICT)
                pdf_io.savefig(ax.figure, bbox_inches="tight")
                plt.close(ax.figure)

    # CREATE PDF REPORT

    html = report_writer.pug_to_html(
        path=os.path.join(ASSETS_DIR, "optimization_report.pug"),
        project_name=project_name,
        problem=problem,
        constraints_evaluations=constraints_evaluations,
        objectives_evaluations=objectives_evaluations,
        edits=sum(len(f) for f in edits),
        sequenticons={
            label: sequenticon(seq, output_format="html_image", size=24)
            for label, seq in [(
                "before", problem.sequence_before), ("after",
                                                     problem.sequence)]
        })
    report_writer.write_report(html, root._file("Report.pdf"))
    # jinja_env = jinja2.Environment()
    # jinja_env.globals.update(zip=zip, len=len)
    # template_path = os.path.join(TEMPLATES_DIR, "optimization_report.html")
    # with open(template_path, "r") as f:
    #     REPORT_TEMPLATE = jinja_env.from_string(f.read())
    #
    # html = REPORT_TEMPLATE.render(
    #     dnachisel_version=__version__,
    #     project_name="bla",
    #     problem=problem,
    #     outcome="SUCCESS" if constraints_evaluations.all_evaluations_pass()
    #             else "FAIL",
    #     constraints_after=constraints_evaluations,
    #     objectives_after=objectives_evaluations,
    #     edits=sum(len(f) for f in edits)
    # )
    # weasy_html = weasyprint.HTML(string=html,  base_url=TEMPLATES_DIR)
    # weasy_html.write_pdf(root._file("Report.pdf"))

    problem.to_record(root._file("final_sequence.gb").open("w"),
                      with_constraints=False,
                      with_objectives=False)

    # returns zip data if target == '@memory'
    if isinstance(target, str):
        return root._close()
Пример #18
0
def extract_from_input(
    filename=None,
    directory=None,
    construct_list=None,
    direct_sense=True,
    output_path=None,
    min_sequence_length=20,
):
    """Extract features from input and return in a dictionary.

    Optionally save the features in separate files.

    Parameters
    ==========

    file
      Input sequence file (Genbank).

    directory
      Directory name containing input sequence files.

    construct_list
      A list of SeqRecords.

    direct_sense
      If True: make antisense features into direct-sense in the exported files.

    output_path
      Path for the exported feature and report files.

    min_sequence_length
      Discard sequences with length less than this integer.
    """
    genbank_id_limit = 20  # GenBank format hard limit for name
    if construct_list:
        pass
    elif filename:
        input_record = load_record(filename,
                                   record_id="auto",
                                   upperize=False,
                                   id_cutoff=genbank_id_limit)
        construct_list = [input_record]
    elif directory:
        construct_list = records_from_data_files(filepaths=None,
                                                 folder=directory)
    else:
        raise TypeError(
            "Specify one of 'construct_list', 'filename' or 'directory'.")

    records_dict = dict()
    recordname_list = []
    for input_record in construct_list:
        records = extract_features(input_record, direct_sense)
        record_name = input_record.name[0:genbank_id_limit]
        # This part makes the key (used as dir name) unique by appending a copynumber:
        number_of_name_occurrences = recordname_list.count(record_name)
        if number_of_name_occurrences:
            key = "%s_%s" % (record_name, number_of_name_occurrences + 1)
        else:
            key = record_name

        recordname_list.append(record_name)

        records_dict[key] = records

    parts_report = make_part_dict(records_dict,
                                  min_sequence_length=min_sequence_length)
    processed_report = process_report(parts_report[1])

    all_parts_dict = parts_report[0]
    records_dict["all_parts"] = list(all_parts_dict.values())

    if output_path is not None:
        root = flametree.file_tree(output_path)

        for key, records in records_dict.items():

            record_dir = root._dir(key)

            record_name_alnum_list = []
            for record in records:

                record_name_alnum = "".join(x if x.isalnum() else "_"
                                            for x in record.name)
                # This part makes the filename unique by appending a copynumber:
                number_of_occurrences = record_name_alnum_list.count(
                    record_name_alnum)
                if number_of_occurrences:
                    record_filename = "%s_%s.gb" % (
                        record_name_alnum,
                        number_of_occurrences + 1,
                    )
                else:
                    record_filename = record_name_alnum + ".gb"

                record_name_alnum_list.append(record_name_alnum)

                record_file_path = record_dir._file(record_filename)

                try:
                    write_record(record, record_file_path, fmt="genbank")

                except Exception as err:
                    print("Error writing", record_filename, str(err))

        processed_report.to_csv(root._file("report.csv").open("w"))

    records_dict["processed_report"] = processed_report

    return records_dict
Пример #19
0
import flametree
from dnacauldron import RestrictionLigationMix, load_record

data_root = flametree.file_tree(".").data.select_connectors

parts = [
    load_record(f._path, linear=False, id=f._name_no_extension[:15])
    for f in data_root.parts_missing_connectors._all_files
    if f._extension == "gb"
]
connectors = [
    load_record(f._path, linear=False, id=f._name_no_extension[:15])
    for f in data_root.connectors._all_files if f._extension == "gb"
]
mix = RestrictionLigationMix(parts, enzyme='BsmBI')
selected_connectors = mix.autoselect_connectors(connectors)
print("Selected connectors: ", ", ".join([c.id for c in selected_connectors]))
Пример #20
0
def test_pandas(tmpdir):
    path = str(tmpdir)
    root = file_tree("@memory")
    root._file('test.csv').write("A,B,C\n1,2,3\n4,5,6")
    dataframe = pandas.read_csv(root.test_csv.open('r'))
Пример #21
0
def write_optimization_report(target, problem, project_name="unnammed",
                              constraints_evaluations=None,
                              objectives_evaluations=None,
                              figure_width=20, max_features_in_plots=300):
    """Write an optimization report with a PDF summary, plots, and genbanks.

    Parameters
    ----------
    target
      Path to a directory or zip file, or "@memory" for returning raw data of
      a zip file created in-memory.
    
    problem
      A DnaOptimizationProblem to be solved and optimized
    
    project_name
      Name of the project that will appear on the PDF report
    
    constraints_evaluations
      Precomputed constraints evaluations. If None provided, they will be
      computed again from the problem.
    
    objectives_evaluations
      Precomputed objectives evaluations. If None provided, they will be
      computed again from the problem.
      
    
    figure_width
      Width of the report's figure, in inches. The more annotations there will
      be in the figure, the wider it should be. The default should work for
      most cases.
    
    max_features_in_plots
      Limit to the number of features to plot (plots with thousands of features
      may take ages to plot)

    """
    if not PDF_REPORTS_AVAILABLE:
        raise ImportError(install_extras_message("PDF Reports"))
    if not SEQUENTICON_AVAILABLE:
        raise ImportError(install_extras_message("Sequenticon"))
    if constraints_evaluations is None:
        constraints_evaluations = problem.constraints_evaluations()
    if objectives_evaluations is None:
        objectives_evaluations = problem.objectives_evaluations()
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target
    translator = SpecAnnotationsTranslator()
    # CREATE FIGURES AND GENBANKS
    diffs_figure_data = None
    sequence_before = sequence_to_biopython_record(problem.sequence_before)
    if GENEBLOCKS_AVAILABLE:
        sequence_after = problem.to_record()
        contract_under = max(3, int(len(sequence_after) / 10))
        diffs = DiffBlocks.from_sequences(sequence_before, sequence_after,
                                          use_junk_over=50,
                                          contract_under=contract_under)
        _, diffs_ax = diffs.plot()
        diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt='svg')
        plt.close(diffs_ax.figure)

    with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io:

        figures_data = [
            (
                "Before",
                sequence_before,
                problem.constraints_before,
                problem.objectives_before,
                []
            ),
            (
                "After",
                sequence_to_biopython_record(problem.sequence),
                constraints_evaluations,
                objectives_evaluations,
                problem.sequence_edits_as_features()
            )
        ]

        plot_height = None
        for (title, record, constraints, objectives, edits) in figures_data:

            full_title = (
                "{title}:        {nfailing} constraints failing (in red)"
                "        Total Score: {score:.01E} {bars}").format(
                title=title, score=objectives.scores_sum(),
                nfailing=len(constraints.filter("failing").evaluations),
                bars="" if (title == "Before") else
                "       (bars indicate edits)"
            )
            ax = None
            if title == "After":
                record.features += edits
                graphical_record = translator.translate_record(record)
                fig, ax = plt.subplots(1, figsize=(figure_width, plot_height))
                graphical_record.plot(ax=ax, level_offset=-0.3)
                record.features = []

            record.features += constraints.success_and_failures_as_features()
            record.features += objectives.success_and_failures_as_features()

            graphical_record = translator.translate_record(record)
            ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width)
            ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT)
            plot_height = ax.figure.get_size_inches()[1]
            pdf_io.savefig(ax.figure, bbox_inches="tight")
            plt.close(ax.figure)

            record.features += edits
            breaches_locations = \
                constraints.filter("failing") \
                           .locations_as_features(label_prefix="Breach from",
                                                  merge_overlapping=True)
            record.features += breaches_locations

            SeqIO.write(record, root._file(title.lower() + ".gb").open("w"),
                        "genbank")

            if breaches_locations != []:
                record.features = breaches_locations
                graphical_record = translator.translate_record(record)
                if len(graphical_record.features) > max_features_in_plots:
                    features = sorted(graphical_record.features,
                                      key=lambda f: f.start - f.end)
                    new_ft = features[:max_features_in_plots]
                    graphical_record.features = new_ft
                    message = "(only %d features shown)" % \
                              max_features_in_plots
                else:
                    message = ""
                ax, _ = graphical_record.plot(figure_width=figure_width)
                ax.set_title(title + ": Constraints breaches locations"
                             + message, loc="left", fontdict=TITLE_FONTDICT)
                pdf_io.savefig(ax.figure, bbox_inches="tight")
                plt.close(ax.figure)

    # CREATE PDF REPORT
    html = report_writer.pug_to_html(
        path=os.path.join(ASSETS_DIR, "optimization_report.pug"),
        project_name=project_name,
        problem=problem,
        constraints_evaluations=constraints_evaluations,
        objectives_evaluations=objectives_evaluations,
        edits=sum(len(f) for f in edits),
        diffs_figure_data=diffs_figure_data,
        sequenticons={
            label: sequenticon(seq, output_format="html_image", size=24)
            for label, seq in [("before", problem.sequence_before),
                               ("after", problem.sequence)]
        }
    )
    problem.to_record(root._file("final_sequence.gb").open("w"),
                      with_constraints=False,
                      with_objectives=False)

    report_writer.write_report(html, root._file("Report.pdf"))
    if isinstance(target, str):
        return root._close()
Пример #22
0
    def work(self):
        self.logger(message="Reading Data...")
        data = self.data

        # Reading picklist

        picklist_filelike = file_to_filelike_object(data.picklist)
        if data.picklist.name.endswith(".csv"):
            csv = picklist_filelike.read().decode()
            rows = [l.split(",") for l in csv.split("\n") if len(l)]
        else:
            dataframe = pandas.read_excel(picklist_filelike)
            rows = [row for i, row in dataframe.iterrows()]
        assembly_plan = AssemblyPlan(
            OrderedDict(
                [
                    (
                        row[0],
                        [
                            str(e).strip()
                            for e in row[1:]
                            if str(e).strip() not in ["-", "nan", ""]
                        ],
                    )
                    for row in rows
                    if row[0]
                    not in ["nan", "Construct name", "constructs", "construct"]
                ]
            )
        )
        for assembly, parts in assembly_plan.assemblies.items():
            assembly_plan.assemblies[assembly] = [
                part.replace(" ", "_") for part in parts
            ]

        # Reading part infos

        if len(data.parts_infos):
            first_file = data.parts_infos[0]
            if first_file.name.endswith((".csv", ".xls", ".xlsx")):
                first_file_filelike = file_to_filelike_object(first_file)
                if first_file.name.endswith(".csv"):
                    dataframe = pandas.read_csv(first_file_filelike)
                else:
                    dataframe = pandas.read_excel(first_file_filelike)
                parts_data = {
                    row.part: {"size": row["size"]} for i, row in dataframe.iterrows()
                }
            else:
                records = records_from_data_files(data.parts_infos)
                if data.use_file_names_as_ids:
                    for r in records:
                        r.id = r.name = r.file_name
                parts_data = {
                    rec.id.replace(" ", "_"): {"record": rec} for rec in records
                }
            assembly_plan.parts_data = parts_data
            parts_without_data = assembly_plan.parts_without_data()
            if len(parts_without_data):
                return {
                    "success": False,
                    "message": "Some parts have no provided record or data.",
                    "missing_parts": parts_without_data,
                }

        # Reading protocol

        if data.quantity_unit == "fmol":
            part_mol = data.part_quantity * 1e-15
            part_g = None
        if data.quantity_unit == "nM":
            part_mol = data.part_quantity * data.total_volume * 1e-15
            part_g = None
        if data.quantity_unit == "ng":
            part_mol = None
            part_g = data.part_quantity * 1e-9
            # Backbone:part molar ratio calculation is not performed in this case.
            # This ensures no change regardless of form input:
            data.part_backbone_ratio = 1

        self.logger(message="Generating picklist")

        picklist_generator = AssemblyPicklistGenerator(
            part_mol=part_mol,
            part_g=part_g,
            complement_to=data.total_volume * 1e-6,
            buffer_volume=data.buffer_volume * 1e-6,
            volume_rounding=2.5e-9,
            minimal_dispense_volume=5e-9,
        )

        backbone_name_list = data.backbone_name.split(",")

        source_filelike = file_to_filelike_object(data.source_plate)
        source_plate = plate_from_content_spreadsheet(source_filelike)
        for well in source_plate.iter_wells():
            if well.is_empty:
                continue
            quantities = well.content.quantities
            part, quantity = list(quantities.items())[0]
            quantities.pop(part)
            quantities[part.replace(" ", "_")] = quantity

            if part in backbone_name_list:
                # This section multiplies the backbone concentration with the
                # part:backbone molar ratio. This tricks the calculator into making
                # a picklist with the desired ratio.
                # For example, a part:backbone = 2:1 will multiply the
                # backbone concentration by 2, therefore half as much of it will be
                # added to the well.
                quantities[part.replace(" ", "_")] = quantity * data.part_backbone_ratio
            else:
                quantities[part.replace(" ", "_")] = quantity

        source_plate.name = "Source"

        self.logger(message="Generating Picklist...")
        destination_plate = Plate4ti0960("Mixplate")
        if data.destination_plate:
            dest_filelike = file_to_filelike_object(data.destination_plate)
            destination_plate = plate_from_content_spreadsheet(dest_filelike)
        destination_wells = (
            well
            for well in destination_plate.iter_wells(direction="column")
            if well.is_empty
        )
        picklist, picklist_data = picklist_generator.make_picklist(
            assembly_plan,
            source_wells=source_plate.iter_wells(),
            destination_wells=destination_wells,
        )
        if picklist is None:
            return {
                "success": False,
                "message": "Some parts in the assembly plan have no "
                "corresponding well.",
                "picklist_data": picklist_data,
                "missing_parts": picklist_data.get("missing_parts", None),
            }
        future_plates = picklist.execute(inplace=False)

        def text(w):
            txt = human_volume(w.content.volume)
            if "construct" in w.data:
                txt = "\n".join([w.data.construct, txt])
            return txt

        plotter = PlateTextPlotter(text)
        ax, _ = plotter.plot_plate(future_plates[destination_plate], figsize=(20, 8))

        ziproot = flametree.file_tree("@memory", replace=True)

        # MIXPLATE MAP PLOT

        ax.figure.savefig(
            ziproot._file("final_mixplate.pdf").open("wb"),
            format="pdf",
            bbox_inches="tight",
        )
        plt.close(ax.figure)
        plate_to_platemap_spreadsheet(
            future_plates[destination_plate],
            lambda w: w.data.get("construct", ""),
            filepath=ziproot._file("final_mixplate.xls").open("wb"),
        )

        self.logger(message="Writing report...")

        # ASSEMBLY REPORT

        picklist_to_assembly_mix_report(
            picklist,
            ziproot._file("assembly_mix_picklist_report.pdf").open("wb"),
            data=picklist_data,
        )
        assembly_plan.write_report(
            ziproot._file("assembly_plan_summary.pdf").open("wb")
        )

        # MACHINE PICKLIST

        if data.dispenser_machine == "labcyte_echo":
            picklist_to_labcyte_echo_picklist_file(
                picklist, ziproot._file("ECHO_picklist.csv").open("w")
            )
        else:
            picklist_to_tecan_evo_picklist_file(
                picklist, ziproot._file("EVO_picklist.gwl").open("w")
            )
        raw = file_to_filelike_object(data.source_plate).read()
        f = ziproot._file(data.source_plate.name)
        f.write(raw, mode="wb")
        zip_data = ziproot._close()

        return {
            "file": {
                "data": data_to_html_data(zip_data, "zip"),
                "name": "picklist.zip",
                "mimetype": "application/zip",
            },
            "success": True,
        }
Пример #23
0
def write_optimization_report(
    target,
    problem,
    project_name="unnammed",
    plot_figure=True,
    constraints_evaluations=None,
    objectives_evaluations=None,
    figure_width=20,
    max_features_in_plots=300,
    file_path=None,
    file_content=None,
):
    """Write an optimization report with a PDF summary, plots, and genbanks.

    Parameters
    ----------
    target
      Path to a directory or zip file, or "@memory" for returning raw data of
      a zip file created in-memory.

    problem
      A DnaOptimizationProblem to be solved and optimized

    project_name
      Name of the project that will appear on the PDF report

    constraints_evaluations
      Precomputed constraints evaluations. If None provided, they will be
      computed again from the problem.

    objectives_evaluations
      Precomputed objectives evaluations. If None provided, they will be
      computed again from the problem.
    

    figure_width
      Width of the report's figure, in inches. The more annotations there will
      be in the figure, the wider it should be. The default should work for
      most cases.

    max_features_in_plots
      Limit to the number of features to plot (plots with thousands of features
      may take ages to plot)
    
    file_path
      Path to the file from which the problem was created
    
    

    """
    if not PDF_REPORTS_AVAILABLE:
        raise ImportError(install_extras_message("PDF Reports"))
    if not SEQUENTICON_AVAILABLE:
        raise ImportError(install_extras_message("Sequenticon"))
    if constraints_evaluations is None:
        constraints_evaluations = problem.constraints_evaluations()
    if objectives_evaluations is None:
        objectives_evaluations = problem.objectives_evaluations()
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target

    # TRANSFER THE ORIGINAL FILE
    file_hash = None
    if file_path is not None:
        if file_content is None:
            with open(file_path, "rb") as f:
                file_content = f.read()
        basename = os.path.basename(file_path)
        file_hash = hashlib.md5(file_content).hexdigest()[:8]
        root._file("_".join([file_hash, basename])).write(file_content)

    # CREATE FIGURES AND GENBANKS
    diffs_figure_data = None
    if GENEBLOCKS_AVAILABLE and plot_figure:
        diffs_ax = plot_optimization_changes(problem)
        diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt="svg")
        plt.close(diffs_ax.figure)

    # GENERATE AND SAVE THE CONSTRAINTS SUMMARY

    constraints_before_after = constraints_before_after_dataframe(
        problem=problem, constraints_evaluations=constraints_evaluations)
    filename = "constraints_before_and_after.csv"
    constraints_before_after.to_csv(root._file(filename).open("w"),
                                    index=False)

    # GENERATE AND SAVE THE OBJECTIVES SUMMARY

    objectives_before_after = objectives_before_after_dataframe(
        problem=problem, objectives_evaluations=objectives_evaluations)
    filename = "objectives_before_and_after.csv"
    objectives_before_after.to_csv(root._file(filename).open("w"), index=False)

    # CREATE PDF REPORT
    html = report_writer.pug_to_html(
        path=os.path.join(ASSETS_DIR, "optimization_report.pug"),
        project_name=project_name,
        problem=problem,
        constraints_evaluations=constraints_evaluations,
        objectives_evaluations=objectives_evaluations,
        constraints_before_after=constraints_before_after,
        objectives_before_after=objectives_before_after,
        edits=problem.sequence_edits_as_array().sum(),
        diffs_figure_data=diffs_figure_data,
        file_hash=file_hash,
        sequenticons={
            label: sequenticon(seq, output_format="html_image", size=24)
            for label, seq in [
                ("before", problem.sequence_before),
                ("after", problem.sequence),
            ]
        },
    )
    report_writer.write_report(html, root._file("Report.pdf"))

    # CREATE THE "SEQUENCE EDITS" REPORT

    record = problem.to_record(with_sequence_edits=True)
    breaches = problem.constraints_before.filter("failing")
    breaches_locations = breaches.locations_as_features(
        label_prefix="Breach from", merge_overlapping=True)
    record.features += breaches_locations
    SeqIO.write(record,
                root._file("final_sequence_with_edits.gb").open("w"),
                "genbank")

    # CREATE THE "FINAL SEQUENCE" REPORT

    problem.to_record(
        root._file("final_sequence.gb").open("w"),
        with_constraints=False,
        with_objectives=False,
    )

    if isinstance(target, str):
        return root._close()
Пример #24
0
import flametree # for getting/writing files and folders
from dnacauldron import RestrictionLigationMix, load_record, write_record

root = flametree.file_tree('.')
parts = [
    load_record(f._path, linear=False)
    for f in root.data.assemblies._all_files
]
mix = RestrictionLigationMix(parts, enzyme='BsmBI')
assemblies_records = mix.compute_circular_assemblies()
output_folder = root._dir('output_data')._dir('combinatorial_assemblies')
for i, record in enumerate(assemblies_records):
    output = output_folder._file("assembly_%03d.gb" % i)
    write_record(record, output, "genbank")
print ("%d combinatorial assembly genbanks written in output_data/assemblies"
       % (i + 1))
Пример #25
0
    def work(self):

        data = self.data
        figures = []

        self.logger(message="Generating report...")
        records = records_from_data_files(data.files)
        constraints = [
            dc.AvoidPattern("BsaI_site"),
            dc.AvoidPattern("BsmBI_site"),
            dc.AvoidPattern("BbsI_site"),
            dc.AvoidPattern("SapI_site"),
            dc.AvoidPattern("8x1mer"),
            dc.AvoidPattern("5x3mer"),
            dc.AvoidPattern("9x2mer"),
            dc.AvoidHairpins(stem_size=20, hairpin_window=200),
            dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
            dc.EnforceGCContent(mini=0.1, maxi=0.9, window=100),
            dc.UniquifyAllKmers(k=15),
        ]

        dataframe = cr.constraints_breaches_dataframe(constraints, records)
        spreadsheet_io = BytesIO()
        dataframe.to_excel(spreadsheet_io)
        records = cr.records_from_breaches_dataframe(dataframe, records)
        zipped_records = flametree.file_tree("@memory")
        if data.include_genbanks:
            for record in records:
                target = zipped_records._file("%s.gb" % record.id)
                write_record(record, target)
        pdf_io = BytesIO()
        cr.breaches_records_to_pdf(records, pdf_io, logger=self.logger)

        return {
            "pdf_report": {
                "data":
                data_to_html_data(
                    pdf_io.getvalue(),
                    "pdf",
                    filename="manufacturability_report.pdf",
                ),
                "name":
                "manufacturability_report.pdf",
                "mimetype":
                "application/pdf",
            },
            "records": {
                "data":
                data_to_html_data(
                    zipped_records._close(),
                    "zip",
                    filename="manufacturability_annotated_records.zip",
                ),
                "name":
                "manufacturability_annotated_records.zip",
                "mimetype":
                "application/zip",
            },
            "spreadsheet": {
                "data":
                data_to_html_data(
                    spreadsheet_io.getvalue(),
                    "xlsx",
                    filename="manufacturability_report.xlsx",
                ),
                "name":
                "manufacturability_report.xlsx",
                "mimetype":
                "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            },
        }
Пример #26
0
    def write_report(
        self,
        target,
        folder_name="auto",
        assembly_report_writer="default",
        logger="bar",
        include_original_parts_records=True,
    ):
        """Write a comprehensive report to a folder or zip file.

        Parameters
        ----------

        target
          Either a path to a folder, to a zip file, or ``"@memory"`` to write
          into a virtual zip file whose raw data is then returned.

        folder_name
          Name of the folder created inside the target to host the report (yes,
          it is a folder inside a folder, which can be very practical).

        assembly_report_writer
          Either the "default" or any AssemblyReportWriter instance.

        logger
          Either "bar" for a progress bar, or None, or any Proglog logger.

        include_original_parts_records
          If true, the original provided part records will be included in the
          report (creates larger file sizes, but better for traceability).
        """
        if assembly_report_writer == "default":
            # We'll write all records into one folder for the whole plan
            assembly_report_writer = AssemblyReportWriter(
                include_part_records=False)
        logger = proglog.default_bar_logger(logger)
        if folder_name == "auto":
            folder_name = self.assembly_plan.name + "_simulation"
        report_root = file_tree(target)._dir(folder_name, replace=True)
        self._write_assembly_reports(report_root,
                                     assembly_report_writer,
                                     logger=logger)
        self._write_errors_spreadsheet(report_root, error_type="error")
        self._write_errors_spreadsheet(report_root, error_type="warning")

        self._write_all_required_parts(report_root)
        self._write_construct_summary_spreadsheet(report_root)
        self._write_assembly_plan_spreadsheets(report_root)
        self._write_summary_stats(report_root)
        if len(self.cancelled):
            self._write_cancelled_assemblies(report_root)
        if include_original_parts_records:
            self._write_all_required_parts_records(report_root)
        if not self.has_single_level:
            self._plot_assembly_graph(report_root)

        if assembly_report_writer.include_pdf_report:
            if not PDF_REPORTS_AVAILABLE:
                raise ImportError(
                    "Could not load PDF Reports. Install with `pip install pdf_reports`"
                    " to generate a PDF report.")

            simulation_info = self._calculate_simulation_info()
            write_simulation_pdf_report(report_root._file("Report.pdf"),
                                        simulation_info)

        if target == "@memory":
            return report_root._close()
import flametree  # for getting/writing files and folders
from dnacauldron import RestrictionLigationMix, load_record, write_record

root = flametree.file_tree(".")
parts = [
    load_record(f._path, topology="circular")
    for f in root.data.assemblies._all_files
]
mix = RestrictionLigationMix(parts, enzyme="BsmBI")
assemblies_records = mix.compute_circular_assemblies()
output_folder = root._dir("output_data")._dir("combinatorial_assemblies")
for i, record in enumerate(assemblies_records):
    output = output_folder._file("assembly_%03d.gb" % i)
    write_record(record, output, "genbank")
print(
    "%d combinatorial assembly genbanks written in output_data/assemblies"
    % (i + 1)
)
Пример #28
0
def full_assembly_report(
    parts,
    target,
    enzyme="BsmBI",
    max_assemblies=40,
    connector_records=(),
    include_fragments_plots="on_failure",
    include_parts_plots="on_failure",
    include_fragments_connection_graph="on_failure",
    include_assembly_plots=True,
    n_expected_assemblies=None,
    no_skipped_parts=False,
    fragments_filters="auto",
    assemblies_prefix="assembly",
    show_overhangs_in_graph=True,
    show_overhangs_in_genbank=True,
    mix_class="restriction",
):
    """Write a full assembly report in a folder or a zip.

    The report contains the final sequence(s) of the assembly in Genbank format
    as well as a .csv report on all assemblies produced and PDF figures
    to allow a quick overview or diagnostic.

    Folder ``assemblies`` contains the final assemblies, ``assembly_graph``
    contains a schematic view of how the parts assemble together, folder
    ``fragments`` contains the details of all fragments produced by the enzyme
    digestion, and folder ``provided_parts`` contains the original input
    (genbanks of all parts provided for the assembly mix).

    Parameters
    ----------

    parts
      List of Biopython records representing the parts, potentially on entry
      vectors. All the parts provided should have different attributes ``name``
      as it is used to name the files.

    target
      Either a path to a folder, or to a zip file, or ``@memory`` to return
      a string representing zip data (the latter is particularly useful for
      website backends).

    enzyme
      Name of the enzyme to be used in the assembly

    max_assemblies
      Maximal number of assemblies to consider. If there are more than this
      the additional ones won't be returned.

    fragments_filters
      Fragments filters to be used to filter out fragments before looking for
      assemblies. If left to auto, fragments containing the enzyme site will
      be filtered out.

    connector_records
      List of connector records (a connector is a part that can bridge a gap
      between two other parts), from which only the essential elements to form
      an assembly will be automatically selected and added to the other parts.

    assemblies_prefix
      Prefix for the file names of all assemblies. They will be named
      ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix.

    include_parts_plots, include_assembly_plots
      These two parameters control the rendering of extra figures which are
      great for troubleshooting, but not strictly necessary, and they slow
      down the report generation considerably. They can be True, False, or
      "on_failure" to be True only if the number of assemblies differs from
      n_expected_assemblies
    
    n_expected_assemblies
      Expected number of assemblies. No exception is raised if this number is
      not met, however, if parameters ``include_parts_plots`` and
      ``include_assembly_plots`` are set to "on_failure", then extra plots
      will be plotted. 


    """
    # Make prefix Genbank friendly
    assemblies_prefix = assemblies_prefix.replace(" ", "_")[:18]

    if mix_class == "restriction":
        mix_class = RestrictionLigationMix
    part_names = [p.name for p in parts]
    non_unique = [e for (e, count) in Counter(part_names).items() if count > 1]
    non_unique = list(set(non_unique))
    if len(non_unique) > 0:
        raise ValueError("All parts provided should have different names. "
                         "Assembly (%s) contains several times the parts %s " %
                         (" ".join(part_names), ", ".join(non_unique)))
    if fragments_filters == "auto":
        fragments_filters = [NoRestrictionSiteFilter(enzyme)]

    report = file_tree(target, replace=True)

    assemblies_dir = report._dir("assemblies")

    mix = mix_class(parts, enzyme, fragments_filters=fragments_filters)
    if len(connector_records):
        try:
            mix.autoselect_connectors(connector_records)
        except AssemblyError as err:
            ax = mix.plot_slots_graph(
                with_overhangs=show_overhangs_in_graph,
                show_missing=True,
                highlighted_parts=part_names,
            )
            f = report._file("parts_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
            ax = mix.plot_connections_graph()
            f = report._file("connections_graph.pdf")
            ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
            plt.close(ax.figure)

            raise err

    # ASSEMBLIES
    filters = (FragmentSetContainsPartsFilter(part_names), )
    assemblies = mix.compute_circular_assemblies(
        annotate_homologies=show_overhangs_in_genbank,
        fragments_sets_filters=filters if no_skipped_parts else (),
    )
    assemblies = sorted(
        [asm for (i, asm) in zip(range(max_assemblies), assemblies)],
        key=lambda asm: str(asm.seq),
    )
    assemblies_data = []
    i_asm = list(zip(range(max_assemblies), assemblies))
    for i, asm in i_asm:
        if len(i_asm) == 1:
            name = assemblies_prefix
        else:
            name = "%s_%03d" % (assemblies_prefix, (i + 1))
        asm.name = asm.id = name
        assemblies_data.append(
            dict(
                assembly_name=name,
                parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]),
                number_of_parts=len(asm.fragments),
                assembly_size=len(asm),
            ))
        write_record(asm, assemblies_dir._file(name + ".gb"), "genbank")
        if include_assembly_plots:
            gr_record = AssemblyTranslator().translate_record(asm)
            ax, gr = gr_record.plot(figure_width=16)
            ax.set_title(name)
            ax.set_ylim(top=ax.get_ylim()[1] + 1)
            ax.figure.savefig(
                assemblies_dir._file(name + ".pdf").open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    is_failure = (len(assemblies)
                  == 0) or ((n_expected_assemblies is not None) and
                            (len(assemblies) != n_expected_assemblies))
    if include_fragments_plots == "on_failure":
        include_fragments_plots = is_failure
    if include_parts_plots == "on_failure":
        include_parts_plots = is_failure
    if include_fragments_connection_graph == "on_failure":
        include_fragments_connection_graph = is_failure

    # PROVIDED PARTS
    if include_parts_plots:
        provided_parts_dir = report._dir("provided_parts")
        for part in parts:
            linear = record_is_linear(part, default=False)
            ax, gr = plot_cuts(part, enzyme, linear=linear)
            f = provided_parts_dir._file(part.name + ".pdf").open("wb")
            ax.figure.savefig(f, format="pdf", bbox_inches="tight")
            plt.close(ax.figure)
            gb_file = provided_parts_dir._file(part.name + ".gb")
            write_record(part, gb_file, "genbank")

    # FRAGMENTS
    if include_fragments_plots:
        fragments_dir = report._dir("fragments")
        seenfragments = defaultdict(lambda *a: 0)
        for fragment in mix.fragments:
            gr = BiopythonTranslator().translate_record(fragment)
            ax, _ = gr.plot()
            name = name_fragment(fragment)
            seenfragments[name] += 1
            file_name = "%s_%02d.pdf" % (name, seenfragments[name])
            ax.figure.savefig(
                fragments_dir._file(file_name).open("wb"),
                format="pdf",
                bbox_inches="tight",
            )
            plt.close(ax.figure)

    # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS)
    if include_fragments_connection_graph:
        ax = mix.plot_connections_graph()
        f = report._file("connections_graph.pdf")
        ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
        plt.close(ax.figure)

    graph = mix.slots_graph(with_overhangs=False)
    slots_dict = {
        s: "|".join(list(pts))
        for s, pts in mix.compute_slots().items()
    }
    non_linear_slots = [(slots_dict[n],
                         "|".join([slots_dict[b] for b in graph.neighbors(n)]))
                        for n in graph.nodes() if graph.degree(n) != 2]

    # PLOT SLOTS GRAPH
    if len(connector_records):
        highlighted_parts = part_names
    else:
        highlighted_parts = []
    ax = mix.plot_slots_graph(
        with_overhangs=show_overhangs_in_graph,
        show_missing=True,
        highlighted_parts=highlighted_parts,
    )
    f = report._file("parts_graph.pdf")
    ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight")
    plt.close(ax.figure)

    if len(non_linear_slots):
        report._file("non_linear_nodes.csv").write(
            "\n".join(["part,neighbours"] + [
                "%s,%s" % (part, neighbours)
                for part, neighbours in non_linear_slots
            ]))

    df = pandas.DataFrame.from_records(
        assemblies_data,
        columns=["assembly_name", "number_of_parts", "assembly_size", "parts"],
    )
    df.to_csv(report._file("report.csv").open("w"), index=False)
    n_constructs = len(df)
    if target == "@memory":
        return n_constructs, report._close()
    else:
        if isinstance(target, str):
            report._close()
        return n_constructs
Пример #29
0
def generate_report(records,
                    digestions,
                    ladder,
                    group_by="digestions",
                    show_band_sizes=False,
                    full_report=True):
    """Yeah !"""

    files_contents = []
    all_patterns = defaultdict(lambda *a: {})
    zip_root = flametree.file_tree("@memory")
    # with PdfPages(details_pdf_sio) as details_pdf:
    with PdfPages(zip_root._file("Details.pdf").open("wb")) as details_pdf:
        for record in records:
            record_label = record.id
            for enzymes in digestions:

                enzymes_label = " + ".join(sorted(enzymes))
                basename = "%s--%s" % (record_label.replace(
                    " ", "_"), "+".join(enzymes))
                record_digestion = annotate_digestion_bands(
                    record, enzymes, ladder)
                files_contents.append([
                    ("genbanks", basename + ".gb"),
                    lambda fh: write_record(record_digestion, fh, "genbank")
                ])
                if full_report:
                    (ax, _, _) = plot_record_digestion(record_digestion,
                                                       ladder, record_label,
                                                       enzymes_label)
                    details_pdf.savefig(ax.figure, bbox_inches="tight")
                    plt.close(ax.figure)
                bands = sorted([
                    f.qualifiers["band_size"]
                    for f in record_digestion.features
                    if f.qualifiers.get("band_size", False)
                ])
                if group_by == "digestions":
                    all_patterns[enzymes_label][record_label] = bands
                else:
                    all_patterns[record_label][enzymes_label] = bands

    Y = len(all_patterns)
    X = len(list(all_patterns.values())[0])
    fig, axes = plt.subplots(Y, 1, figsize=(0.9 * X, 3 * Y))
    if Y == 1:
        axes = [axes]
    bands_props = {"band_thickness": 2.5}
    if show_band_sizes:
        bands_props.update(dict(label='=size', label_fontdict=dict(size=6)))
    for ax, (cat1, cat2s) in zip(axes, sorted(all_patterns.items())):
        pattern_set = bw.BandsPatternsSet(patterns=[
            bw.BandsPattern(_bands,
                            ladder=ladder,
                            label=cat2 if (ax == axes[0]) else None,
                            label_fontdict=dict(rotation=70),
                            global_bands_props=bands_props)
            for cat2, _bands in cat2s.items()
        ],
                                          ladder=ladder,
                                          ladder_ticks=4,
                                          ticks_fontdict=dict(size=9),
                                          label=cat1)
        pattern_set.plot(ax)

    preview = matplotlib_figure_to_svg_base64_data(fig, bbox_inches="tight")
    if full_report:
        # zip_root._file("details.pdf").write(details_pdf_sio.getvalue())
        fig.savefig(zip_root._file("summary.pdf").open("wb"),
                    format="pdf",
                    bbox_inches="tight")
        report = zip_root._close()
    else:
        report = None
    return preview, report
Пример #30
0
    def work(self):

        self.logger(message="Exploring possible digestions...")

        data = self.data
        ladder = LADDERS[data.ladder]
        enzymes = data.possible_enzymes
        records = records_from_data_files(data.files)
        for record in records:
            set_record_topology(record, data.topology)
        # sequences = OrderedDict(
        #     [(record.id, str(record.seq)) for record in records]
        # )

        self.logger(message="Initializing...")

        if data.goal == "ideal":
            mini, maxi = data.bands_range
            problem = IdealDigestionsProblem(
                sequences=records,
                enzymes=enzymes,
                ladder=ladder,
                min_bands=mini,
                max_bands=maxi,
                max_enzymes_per_digestion=data.max_enzymes)
        else:
            problem = SeparatingDigestionsProblem(
                sequences=records,
                enzymes=enzymes,
                ladder=ladder,
                max_enzymes_per_digestion=data.max_enzymes,
            )
        self.logger(message="Selecting digestions...")
        score, selected_digestions = problem.select_digestions(
            max_digestions=data.max_digestions, search="full")
        bands_props = (None if not data.show_bands_sizes else dict(
            label="=size", label_fontdict=dict(size=6)))
        axes = problem.plot_digestions(
            selected_digestions,
            patterns_props={"label_fontdict": {
                "rotation": 35
            }},
            bands_props=bands_props,
        )
        figure_data = matplotlib_figure_to_svg_base64_data(axes[0].figure,
                                                           bbox_inches="tight")

        if data.plot_cuts:
            ladder = bandwagon.custom_ladder(None, ladder.bands)
            self.logger(message="Plotting cuts maps...")
            zip_root = flametree.file_tree("@memory")
            bandwagon.plot_records_digestions(
                target=zip_root._file("Details.pdf").open("wb"),
                ladder=ladder,
                records_and_digestions=[(rec, digestion) for rec in records
                                        for digestion in selected_digestions],
            )
            pdf_data = zip_root["Details.pdf"].read("rb")
            pdf_data = data_to_html_data(pdf_data, datatype="pdf")
        else:
            pdf_data = None

        return {
            "figure_data": figure_data,
            "digestions": selected_digestions,
            "score": score,
            "pdf_data": pdf_data,
        }