def test_constraints_reports():
    genbank_dir = os.path.join("tests", "data", "10_emma_genbanks")
    records = [
        dc.load_record(os.path.join(genbank_dir, filename), name=filename)
        for filename in os.listdir(genbank_dir)
    ]

    # DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD

    constraints = [
        dc.AvoidPattern("BsaI_site"),
        dc.AvoidPattern("BsmBI_site"),
        dc.AvoidPattern("BbsI_site"),
        dc.AvoidPattern("8x1mer"),
        dc.AvoidPattern("5x3mer"),
        dc.AvoidPattern("9x2mer"),
        dc.AvoidHairpins(stem_size=20, hairpin_window=200),
        dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
    ]

    # CREATE A SPREADSHEET AND PLOTS OF THE BREACHES

    dataframe = cr.constraints_breaches_dataframe(constraints, records)
    records = cr.records_from_breaches_dataframe(dataframe, records)
    assert sum([len(r.features) for r in records]) == 157
    pdf_data = cr.breaches_records_to_pdf(records)

    assert 70000 < len(pdf_data) < 72000
Пример #2
0
def test_optimization_2():
    sequence_path = os.path.join("tests", "data",
                                 "test_optimization_sequence_2.fa")
    sequence = str(load_record(sequence_path).seq)[:5500]

    deluxe_dna = CommercialDnaOffer(
        name="DeluxeDNA.com",
        sequence_constraints=[SequenceLengthConstraint(max_length=4000)],
        pricing=PerBasepairPricing(0.20),
        lead_time=10,
    )

    cheap_dna = CommercialDnaOffer(
        name="CheapDNA.com",
        sequence_constraints=[
            NoPatternConstraint(enzyme="BsaI"),
            EnforceGCContent(0.3, 0.7, window=60),
        ],
        pricing=PerBasepairPricing(0.10),
        lead_time=15,
    )

    # BLOCKS TO CHUNKS ASSEMBLY

    gibson_blocks_assembly_station = DnaAssemblyStation(
        name="Gibson Blocks Assembly",
        assembly_method=GibsonAssemblyMethod(
            overhang_selector=FixedSizeSegmentSelector(10),
            min_segment_length=1000,
            max_segment_length=6000,
            duration=8,
            cost=16,
        ),
        supplier=[deluxe_dna, cheap_dna],
        coarse_grain=30,
        fine_grain=False,
        memoize=True,
        # a_star_factor="auto",
    )

    quote_before = gibson_blocks_assembly_station.get_quote(sequence)
    assert quote_before.price > 850

    objective = OptimizeManufacturability(gibson_blocks_assembly_station)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(location=(0, 4998))],
        objectives=[objective],
    )

    problem.randomization_threshold = 0  # Forces "random search" mode
    problem.max_random_iters = 5
    problem.optimize()

    print("OPTIMIZATION DONE, GENERATING REPORT")

    quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence)
    assert quote_after.price < 580
Пример #3
0
def test_cuba_example_1():
    path = os.path.join('tests', 'tests_from_genbanks', 'genbanks',
                        'cuba_example_1.gbk')
    record = load_record(path)
    problem = DnaOptimizationProblem.from_record(record)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert problem.objective_scores_sum() < -100
    problem.optimize()
    assert problem.objective_scores_sum() > -0.1
Пример #4
0
def test_optimization_1():
    company_ingen = CommercialDnaOffer(
        name="Company InGen",
        pricing=PerBasepairPricing(0.08),
        sequence_constraints=[NoPatternConstraint(enzyme="AarI")],
    )
    company_delux = CommercialDnaOffer(
        name="Company Delux",
        pricing=PerBasepairPricing(0.66),
        sequence_constraints=[],
    )

    assembly_station = DnaAssemblyStation(
        name="Gibson Assembly Station",
        assembly_method=GibsonAssemblyMethod(
            overhang_selector=FixedSizeSegmentSelector(20),
            min_segment_length=200,
            max_segment_length=1200,
        ),
        supplier=[company_ingen, company_delux],
        coarse_grain=20,
        # a_star_factor="auto",
    )
    sequence_path = os.path.join("tests", "data",
                                 "test_optimization_sequence_1.fa")
    sequence = load_record(sequence_path)
    objective = OptimizeManufacturability(assembly_station)
    problem = DnaOptimizationProblem(sequence=sequence, objectives=[objective])
    quote = objective.get_quote(problem)
    score = problem.objective_scores_sum()
    assert -367 < score < -366
    problem.randomization_threshold = 0
    problem.max_random_iters = 5
    problem.optimize()
    score = problem.objective_scores_sum()
    assert -244 < score < -243
Пример #5
0
    DnaOptimizationProblem,
    load_record,
    Location,
    EnforceTranslation,
    EnforceGCContent,
    AvoidPattern,
)

print("DOWNLOADING AND PARSING THE GENBANK DATA...")

url = (
    "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
    + "db=nucleotide&id=48994873&rettype=gb&retmode=txt"
)
genbank_data = request.urlopen(url).read().decode("utf-8")
genbank_record = load_record(StringIO(genbank_data), file_format="genbank")

print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...")

constraints = []
for feature in genbank_record.features:
    if feature.type == "gene" and len(feature.location.parts) == 1:
        location = Location.from_biopython_location(feature.location)
        if (len(location) % 3 == 0) and len(location) > 100:
            gene_constraints = [
                EnforceTranslation(location = location),
                AvoidPattern("BsmBI_site", location),
                EnforceGCContent(
                    mini=0.40, maxi=0.60, window=150, location=location
                ),
            ]
"""Example of use of the AvoidChanges as an objective to minimize modifications
of a sequence."""

import os
from dnachisel import (AvoidBlastMatches, random_dna_sequence,
                       DnaOptimizationProblem, load_record)

sequence_path = os.path.join("tests", "data", "example_sequence.gbk")
sequence = str(load_record(sequence_path).seq.upper())


def test_avoid_blast_matches():
    avoided_seqs = [
        "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT",
        "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT"
    ]
    constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[constraint])
    assert not problem.all_constraints_pass()
    cst_eval = constraint.evaluate(problem)
    assert len(cst_eval.locations) == 10
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
sns.set()

# DEFINE HOW OPTIMIZATION PROBLEMS ARE CREATED

specifications = {
    "~keep": dc.AvoidChanges(),
    "~no(CG)": dc.AvoidPattern("CG"),
    "~codon_optimize": dc.CodonOptimize(species="e_coli"),
    "~unique_kmers": dc.UniquifyAllKmers(20),
    "~gc(39%)": dc.EnforceGCContent(target=0.39, window=200),
}
class_to_label = {
    spec.__class__: label for label, spec in specifications.items()
}
sequence = dc.load_record("record.gb")


def create_problem(boost_profile):
    location = dc.Location(1000, 9247)
    objectives = []
    for spec_name, boost in boost_profile.items():
        spec = specifications[spec_name]
        spec = spec.copy_with_changes(boost=boost, location=location)
        objectives.append(spec)
    return dc.DnaOptimizationProblem(
        sequence,
        constraints=[dc.EnforceTranslation(location=location)],
        objectives=objectives,
    )
Пример #8
0
    AvoidPattern,
    AvoidChanges,
    EnforceTranslation,
    HomopolymerPattern,
    EnforceGCContent,
    CodonOptimize,
    load_record,
)
from io import StringIO
import urllib

# DOWNLOAD THE PLASMID FROM THE WEB (it is a 7kb plasmid with 3 genes)
url = "http://www.stevekellylab.com/constructs/pDex/pDex577-G.gb"
response = urllib.request.urlopen(url)
record_file = StringIO(response.read().decode("utf-8"))
record = load_record(record_file, fmt="genbank")

CDS_list = [(int(f.location.start), int(f.location.end),
             int(f.location.strand)) for f in record.features
            if f.type == "CDS"]

# DEFINE CONSTRAINTS

dna_provider_constraints = [
    AvoidPattern("BsaI_site"),
    AvoidPattern("AarI_site"),
    AvoidPattern("9xA"),
    AvoidPattern("9xT"),
    AvoidPattern(HomopolymerPattern("6xG")),
    AvoidPattern(HomopolymerPattern("6xC")),
    EnforceGCContent(0.4, 0.65),
Пример #9
0
def test_genbank_import_from_record_unknown_specs():
    record = load_record(example_sequence_path)
    with pytest.raises(TypeError):
        _ = DnaOptimizationProblem.from_record(
            record, specifications_dict={}
        )
Пример #10
0
def test_genbank_import_from_record():
    record = load_record(example_sequence_path)
    problem = DnaOptimizationProblem.from_record(record)
    assert len(problem.constraints) == 5
    assert len(problem.objectives) == 3
Пример #11
0
from urllib import request
from io import StringIO
from dnachisel import (DnaOptimizationProblem, load_record, Location,
                       EnforceTranslation, EnforceGCContent, AvoidPattern)

print("DOWNLOADING AND PARSING THE GENBANK DATA...")

url="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + \
    "db=nucleotide&id=48994873&rettype=gb&retmode=txt"
genbank_data = request.urlopen(url).read().decode('utf-8')
genbank_record = load_record(StringIO(genbank_data), fmt="genbank")

print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...")

constraints = []
for feature in genbank_record.features:
    if feature.type == 'gene' and len(feature.location.parts) == 1:
        location = Location.from_biopython_location(feature.location)
        if (len(location) % 3 == 0) and len(location) > 100:
            constraints.extend([
                EnforceTranslation(location),
                AvoidPattern('BsmBI_site', location),
                EnforceGCContent(mini=0.40,
                                 maxi=0.60,
                                 window=150,
                                 location=location)
            ])
problem = DnaOptimizationProblem(genbank_record, constraints)

print("RESOLVING THE CONSTRAINTS...")
Пример #12
0
import dnachisel as dc
import dnachisel.reports.constraints_reports as cr
import os

# IMPORT THE 10 RECORDS FROM THE genbanks/ FOLDER

records = [
    dc.load_record(os.path.join("genbanks", filename), name=filename)
    for filename in os.listdir("genbanks")
]

# DEFINE THE CONSTRAINTS TO BE CHECKED ON EACH RECORD

constraints = [
    dc.AvoidPattern("BsaI_site"),
    dc.AvoidPattern("BsmBI_site"),
    dc.AvoidPattern("BbsI_site"),
    dc.AvoidPattern("8x1mer"),
    dc.AvoidPattern("5x3mer"),
    dc.AvoidPattern("9x2mer"),
    dc.AvoidHairpins(stem_size=20, hairpin_window=200),
    dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100),
]

# CREATE A SPREADSHEET AND PLOTS OF THE BREACHES

dataframe = cr.constraints_breaches_dataframe(constraints, records)
dataframe.to_excel("breaches.xlsx")
records = cr.records_from_breaches_dataframe(dataframe, records)
cr.breaches_records_to_pdf(records, "breaches_plots.pdf")
Пример #13
0
much improved.

The final sequence (with the original annotations) is exported to Genbank.
"""

from dnachisel import (DnaOptimizationProblem, AvoidPattern, AvoidChanges,
                       EnforceTranslation, HomopolymerPattern,
                       EnforceGCContent, CodonOptimize, load_record)
from io import StringIO
import urllib

# DOWNLOAD THE PLASMID FROM THE WEB (it is a 7kb plasmid with 3 genes)
url = "http://www.stevekellylab.com/constructs/pDex/pDex577-G.gb"
response = urllib.request.urlopen(url)
record_file = StringIO(response.read().decode('utf-8'))
record = load_record(record_file, fmt='genbank')

CDS_list = [(int(f.location.start), int(f.location.end),
             int(f.location.strand)) for f in record.features
            if f.type == "CDS"]

# DEFINE CONSTRAINTS

dna_provider_constraints = [
    AvoidPattern("BsaI_site"),
    AvoidPattern("AarI_site"),
    AvoidPattern(HomopolymerPattern("A", 9)),
    AvoidPattern(HomopolymerPattern("T", 9)),
    AvoidPattern(HomopolymerPattern("G", 6)),
    AvoidPattern(HomopolymerPattern("C", 9)),
    EnforceGCContent(0.4, 0.65),