def experiment_1(seed=123): """A DNA chisel optimization whose results produced the file test_determinism.py""" np.random.seed(seed) sequence = dc.reverse_translate(dc.random_protein_sequence(50)) # MAXIMIZE THE GC CONTENT problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceGCContent(target=1)], logger=None, ) problem.optimize() # BRING THE GC CONTENT BACK TO 50% problem = dc.DnaOptimizationProblem( sequence=problem.sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceGCContent(target=0.5)], logger=None, ) problem.optimize() return problem.sequence
def test_whole_sequence_change_objective_100(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), objectives=[dc.EnforceChanges()] ) problem.optimize() assert problem.number_of_edits() == 50
def test_parameterization(): def all_none(variables): return all([c is None for c in variables]) problem1 = dc.DnaOptimizationProblem( sequence=200 * "A", constraints=[ dc.EnforceChanges(), dc.EnforceChanges(minimum=20), dc.EnforceChanges(minimum_percent=5), ], objectives=[ dc.EnforceChanges(), dc.EnforceChanges(amount=20), dc.EnforceChanges(amount_percent=5), ], ) record = dc.sequence_to_biopython_record(200 * "A") dc.annotate_record(record, label="@change") dc.annotate_record(record, label="@change(minimum=20)") dc.annotate_record(record, label="@change(minimum=5%)") dc.annotate_record(record, label="~change") dc.annotate_record(record, label="~change(amount=20)") dc.annotate_record(record, label="~change(5%)") problem2 = dc.DnaOptimizationProblem.from_record(record) for problem in [problem1, problem2]: # CHECK CONSTRAINTS c100 = problem.constraints[0] assert c100.minimum == 200 assert c100.minimum_percent == 100 assert all_none([c100.amount, c100.amount_percent]) c20 = problem.constraints[1] assert c20.minimum == 20 assert all_none([c20.minimum_percent, c20.amount, c20.amount_percent]) c5 = problem.constraints[2] assert c5.minimum == 10 assert c5.minimum_percent == 5 assert all_none([c5.amount, c5.amount_percent]) # CHECK OBJECTIVES o100 = problem.objectives[0] assert o100.amount == 200 assert o100.amount_percent == 100 assert all_none([o100.minimum, o100.minimum_percent]) o20 = problem.objectives[1] assert o20.amount == 20 assert all_none([o20.minimum_percent, o20.minimum, o20.amount_percent]) o5 = problem.objectives[2] assert o5.amount == 10 assert o5.amount_percent == 5 assert all_none([o5.minimum, o5.minimum_percent])
def make_restriction_part(part_length, left_overhang, right_overhang, enzyme, forbidden_enzymes, assembly_enzyme='BsmBI'): l_left = len(left_overhang) l_right = len(right_overhang) left_overhang_location = (0, l_left) right_overhang_location = (l_left + part_length, l_left + part_length + l_right) center_location = (l_left, l_left + part_length) core_sequence = (left_overhang + dc.random_dna_sequence(part_length) + right_overhang) enforce_enzyme = dc.EnforcePatternOccurence( enzyme=enzyme, location=center_location) problem = dc.DnaOptimizationProblem( sequence=core_sequence, constraints=[ dc.AvoidChanges(left_overhang_location), dc.AvoidChanges(right_overhang_location), ] + [enforce_enzyme] + [ dc.AvoidPattern(enzyme=enzyme_name) for enzyme_name in forbidden_enzymes + [assembly_enzyme] ] ) problem.resolve_constraints() core_sequence = dc.sequence_to_biopython_record(problem.sequence) for loc in [left_overhang_location, right_overhang_location]: dc.annotate_record(core_sequence, loc, 'overhang') site_location = enforce_enzyme.evaluate(problem).data['matches'][0] dc.annotate_record(core_sequence, site_location.to_tuple(), enzyme) assembly_site = Restriction.__dict__[assembly_enzyme].site flank = dc.sequence_to_biopython_record(assembly_site + 'A') dc.annotate_record(flank, label='flank') return flank + core_sequence + flank.reverse_complement()
def test_whole_sequence_change_constraint_100(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges()] ) assert problem.all_constraints_pass() # due to initial seq. constraining assert problem.number_of_edits() == 50
def test_SequenceLengthBounds(): for length, expected in [(750, True), (400, False), (1200, False)]: problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(length), constraints=[dc.SequenceLengthBounds(500, 800)], logger=None, ) assert problem.all_constraints_pass() == expected
def test_enforce_changes_with_indices_as_constraint(): np.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges(indices=indices)], ) assert problem.number_of_edits() == 15
def test_insert_and_erase_pattern(): numpy.random.seed(123) protein = dc.random_protein_sequence(100) pattern = "ATGC" # CREATE A SEQUENCE WITH 0 PATTERN OCCURENCES sequence = dc.random_compatible_dna_sequence( sequence_length=300, constraints=[ dc.EnforceTranslation(translation=protein), dc.AvoidPattern(pattern), ], logger=None, ) # NOW INCREASE PATTERN OCCURENCES FROM 0 TO 5 problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=5), dc.EnforceTranslation(), ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -5 problem.resolve_constraints() assert problem.all_constraints_pass() sequence = problem.sequence # NOW DECREASE THE NUMBER OF OCCURENCES FROM 5 TO 2 problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=2), dc.EnforceTranslation(), ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -3 problem.resolve_constraints() assert problem.all_constraints_pass()
def test_whole_sequence_change_constraint_4(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(50), constraints=[dc.EnforceChanges(minimum=4)], ) print(problem.number_of_edits()) assert not problem.all_constraints_pass() problem.resolve_constraints() assert 6 >= problem.number_of_edits() >= 4
def random_compatible_dna_sequence(sequence_length, constraints, probas=None, seed=None, max_random_iters=5000, logger='bar', **kwargs): sequence = dc.random_dna_sequence( sequence_length, probas=probas, seed=seed) problem = dc.DnaOptimizationProblem(sequence, constraints=constraints, logger=logger) problem.max_random_iters = max_random_iters problem.resolve_constraints(**kwargs) return problem.sequence
def test_whole_sequence_change_objective_20_going_down(): np.random.seed(123) problem = dc.DnaOptimizationProblem( sequence=20*"AT", constraints=[dc.AvoidPattern("ATA")], objectives=[dc.EnforceChanges(amount=20)], ) problem.mutations_per_iteration = 2 problem.resolve_constraints() assert problem.number_of_edits() >= 24 problem.optimize() assert problem.number_of_edits() == 20
def create_problem(boost_profile): location = dc.Location(1000, 9247) objectives = [] for spec_name, boost in boost_profile.items(): spec = specifications[spec_name] spec = spec.copy_with_changes(boost=boost, location=location) objectives.append(spec) return dc.DnaOptimizationProblem( sequence, constraints=[dc.EnforceTranslation(location=location)], objectives=objectives, )
def test_maximal_protein_sequence_change(): np.random.seed(123) protein = dc.random_protein_sequence(200) sequence = dc.reverse_translate(protein) problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceChanges()], ) problem.resolve_constraints() problem.optimize() assert problem.number_of_edits() == 238 assert dc.translate(problem.sequence) == protein
def test_enforce_changes_with_indices_vs_avoid_changes(): np.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] sequence = dc.random_dna_sequence(50) problem = dc.DnaOptimizationProblem( sequence=sequence, objectives=[ dc.EnforceChanges(indices=indices), dc.AvoidChanges(boost=0.5), ], ) problem.optimize() assert problem.number_of_edits() == 15 problem = dc.DnaOptimizationProblem( sequence=sequence, objectives=[ dc.EnforceChanges(indices=indices), dc.AvoidChanges(boost=1.5), ], ) problem.optimize() assert problem.number_of_edits() == 0
def experiment_2(seed=123): np.random.seed(seed) sequence = dc.reverse_translate(dc.random_protein_sequence(1000)) problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforceTranslation(), dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50), ], objectives=[dc.CodonOptimize(species="e_coli")], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence
def random_compatible_dna_sequence(sequence_length, constraints, probas=None, seed=None, max_random_iters=5000, logger="bar", **kwargs): """Produce a random sequence complying to some specifications. Parameters ---------- sequence_length You guessed it. probas Either None for a fully random initial sequence, or a dict of the form {"A": 0.5, "T": 0.2, ...} to tune initial nucleotide representation constraints List of all DnaChisel specifications that will be applied as constraints. seed Optional seed for the random number generator, for reproducibility. max_random_iters Maximum number of random tries per location solving for the solver. logger Either 'bar' or None (no logger) or any proglog logger. """ sequence = dc.random_dna_sequence(sequence_length, probas=probas, seed=seed) problem = dc.DnaOptimizationProblem(sequence, constraints=constraints, logger=logger) problem.max_random_iters = max_random_iters problem.resolve_constraints(**kwargs) return problem.sequence
supplier=[deluxe_dna, cheap_dna], coarse_grain=30, fine_grain=False, memoize=True, a_star_factor="auto", ) quote_before = gibson_blocks_assembly_station.get_quote( sequence, with_assembly_plan=True) print("LOCATING PRICE-DRIVING REGIONS AND OPTIMIZING... PLEASE WAIT") objective = OptimizeManufacturability(gibson_blocks_assembly_station) problem = dnachisel.DnaOptimizationProblem( sequence=sequence, constraints=[dnachisel.EnforceTranslation(location=(0, 9999))], objectives=[objective]) problem.randomization_threshold = 0 # Forces "random search" mode problem.max_random_iters = 5 problem.optimize() print("OPTIMIZATION DONE, GENERATING REPORT") quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence, with_assembly_plan=True) fig, axes = plt.subplots(2, figsize=(6, 4)) for title, quote, ax in zip( ["Before, optimization", "After optimization"], [quote_before, quote_after],
def test_enforce_pattern_options(): # Checks for Github issue #53 # Test 6 cases: location yes/no, 3 strand options sequence = "A" * 10 pattern = "C" * 4 # location=None problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand="from_location"), ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand="both") ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand=-1) ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -1 problem.resolve_constraints() assert problem.all_constraints_pass() assert dc.reverse_complement( pattern) in problem.sequence # other strand used # location specificed # Use -1 strand from location: problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand="from_location", location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert dc.reverse_complement(pattern) in problem.sequence # Overwrite -1 strand to "both": problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand="both", location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence # uses +1 strand by default # Overwrite -1 strand to +1: problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand=1, location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence # uses +1 strand
regex = "(CCCTTT){3}C{3}" # optimal pattern for i-motif formation query_seq = ( dnachisel.random_dna_sequence(length=50) + i_motif + dnachisel.random_dna_sequence(length=50) ) print(query_seq) seq = Bio.Seq.Seq(query_seq) # Find first occurrence: print(seq.find(i_motif)) # Find all: matches = [ (m.start(), m.end()) for m in re.finditer(i_motif, str(seq)) ] # list of tuples print(seq[matches[0][0] : matches[0][1]]) # Find regex with DNA Chisel: problem = dnachisel.DnaOptimizationProblem( sequence=query_seq, constraints=[dnachisel.AvoidPattern(pattern=regex)] ) print(problem.constraints_text_summary()) compact_regex = "(C{3}T{3}){3}C{3}" # variant of the same regex problem = dnachisel.DnaOptimizationProblem( sequence=query_seq, constraints=[dnachisel.AvoidPattern(pattern=compact_regex)] ) print(problem.constraints_text_summary())