def test_EnforceTranslation_bacterial_valine(): table_name = "Bacterial" protein = "LLTMMVTTTTVMVL" protein_sequence = reverse_translate(protein, table=table_name) for first_codon_before, first_codon_after in [ ("ATG", "ATG"), # methionine stays the only methionine codon ("GTG", "GTG"), # valine-start-codon stays the only valine-start-codon ]: sequence = first_codon_before + protein_sequence cds_constraint = EnforceTranslation( genetic_table="Bacterial", start_codon="keep" ) problem = DnaOptimizationProblem( sequence=sequence, constraints=[cds_constraint], objectives=[EnforceChanges()], logger=None, ) assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL" problem.optimize() protein_after = translate( problem.sequence, table_name, assume_start_codon=True ) assert protein_after == "M" + protein assert problem.sequence[:3] == first_codon_after
def optimize(self, codon_table): self.optimize_frequent(codon_table) # return opt_codons = self.__vaccine_codons_gen.copy() self.__vaccine_codons_gen.clear() vac_strand = self.get_strand(opt_codons) #vir_strand = self.get_strand(self.__virus_codons) codon_table = pct.get_codons_table(codon_table) problem = DnaOptimizationProblem( sequence=vac_strand, constraints=[ EnforceTranslation(genetic_table='Standard', start_codon='ATG'), EnforceGCContent(mini=0.54, maxi=0.9, window=120) ], objectives=[ CodonOptimize(method="use_best_codon", codon_usage_table=codon_table) ] ) problem.resolve_constraints() problem.optimize() self.__vaccine_codons_gen = [] count = 1 vcodon = "" for x in problem.sequence: if count % 3 == 0: vcodon += x self.__vaccine_codons_gen.append(vcodon) vcodon = "" else: vcodon += x count += 1 return
def test_optimization_2(): sequence_path = os.path.join("tests", "data", "test_optimization_sequence_2.fa") sequence = str(load_record(sequence_path).seq)[:5500] deluxe_dna = CommercialDnaOffer( name="DeluxeDNA.com", sequence_constraints=[SequenceLengthConstraint(max_length=4000)], pricing=PerBasepairPricing(0.20), lead_time=10, ) cheap_dna = CommercialDnaOffer( name="CheapDNA.com", sequence_constraints=[ NoPatternConstraint(enzyme="BsaI"), EnforceGCContent(0.3, 0.7, window=60), ], pricing=PerBasepairPricing(0.10), lead_time=15, ) # BLOCKS TO CHUNKS ASSEMBLY gibson_blocks_assembly_station = DnaAssemblyStation( name="Gibson Blocks Assembly", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(10), min_segment_length=1000, max_segment_length=6000, duration=8, cost=16, ), supplier=[deluxe_dna, cheap_dna], coarse_grain=30, fine_grain=False, memoize=True, # a_star_factor="auto", ) quote_before = gibson_blocks_assembly_station.get_quote(sequence) assert quote_before.price > 850 objective = OptimizeManufacturability(gibson_blocks_assembly_station) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation(location=(0, 4998))], objectives=[objective], ) problem.randomization_threshold = 0 # Forces "random search" mode problem.max_random_iters = 5 problem.optimize() print("OPTIMIZATION DONE, GENERATING REPORT") quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence) assert quote_after.price < 580
def test_AvoidNonUniqueSegments_as_objective(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) specification = AvoidNonUniqueSegments(8) problem = DnaOptimizationProblem(sequence=sequence, objectives=[specification]) problem.optimize() assert problem.objectives[0].evaluate(problem).passes
def test_UniquifyAllKmers_as_objective(): numpy.random.seed(123) sequence = random_dna_sequence(1000, seed=123) specification = UniquifyAllKmers(8) problem = DnaOptimizationProblem(sequence=sequence, objectives=[specification], logger=None) problem.optimize() assert problem.objectives[0].evaluate(problem).passes
def test_codon_optimize_with_custom_table(): problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize( codon_usage_table=biotools.CODON_USAGE_TABLES['b_subtilis'])] ) assert (problem.objective_scores_sum() < -10) problem.optimize() assert (problem.objective_scores_sum() == 0)
def test_pattern_and_reverse(): bsmbi = "CGTCTC" bsmbi_rev = "GAGACG" sequence = 10 * bsmbi + 25 * bsmbi_rev + 15 * bsmbi + 15 * bsmbi_rev problem = DnaOptimizationProblem(sequence, constraints=[AvoidPattern('BsmBI_site')], objectives=[AvoidChanges()]) problem.resolve_constraints() problem.optimize() assert sum(problem.sequence_edits_as_array()) < 70
def test_codon_optimize_with_custom_table(): table = get_codons_table("b_subtilis") problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize(codon_usage_table=table)], logger=None, ) assert problem.objective_scores_sum() < -10 problem.optimize() assert problem.objective_scores_sum() == 0
def test_AvoidRareCodons_as_objective(): numpy.random.seed(123) sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG" problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[AvoidRareCodons(0.11, "e_coli")], ) assert abs(problem.objective_scores_sum() + 0.09) < 0.001 problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_bestcodon(): numpy.random.seed(123) protein = random_protein_sequence(3000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli')] ) assert problem.objective_scores_sum() < 0 problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_harmonized(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli', mode='harmonized')] ) assert (-700 < problem.objective_scores_sum() < -600) problem.optimize() assert (-350 < problem.objective_scores_sum())
def test_codon_optimize_harmonized_short_sequence(): protein = "DDDKKKKKK" sequence = reverse_translate(protein) harmonization = CodonOptimize(species='b_subtilis', mode='harmonized') problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization] ) assert problem.objective_scores_sum() < -7 problem.optimize() assert -1 < problem.objective_scores_sum()
def test_AvoidRareCodons_as_objective_reversed(): numpy.random.seed(123) sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG" rev_sequence = reverse_complement(sequence) location = (0, len(sequence), -1) problem = DnaOptimizationProblem( sequence=rev_sequence, constraints=[EnforceTranslation(location=location)], objectives=[AvoidRareCodons(0.11, "e_coli", location=location)], ) assert abs(problem.objective_scores_sum() + 0.09) < 0.001 problem.optimize() assert problem.objective_scores_sum() == 0
def test_avoid_changes_with_indices_as_constraint(): numpy.random.seed(123) indices = [10, 20] + list(range(30, 40)) + [44, 45, 46] sequence = random_dna_sequence(50) problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidChanges(indices=indices)], objectives=[EnforceChanges()], logger=None, ) problem.optimize() assert problem.number_of_edits() == 50 - 15
def test_codon_optimize_match_usage(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[ CodonOptimize(species="e_coli", method="match_codon_usage") ], logger=None, ) assert -600 < problem.objective_scores_sum() < -550 problem.optimize() assert -350 < problem.objective_scores_sum()
def create_new_primer(existing_primers): """Create a new primer based on the primers created so far""" problem = DnaOptimizationProblem( sequence=random_dna_sequence(length=20), constraints=[ AvoidHeterodimerization(existing_primers, tmax=3), AvoidPattern("3x3mer"), AvoidPattern("4xG"), ], objectives=[EnforceGCContent(target=0.6)], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence
def test_codon_optimize_harmonize_rca_short_sequence(): protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) harmonization = CodonOptimize(species="h_sapiens", original_species="e_coli", method="harmonize_rca") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization], logger=None, ) assert problem.objective_scores_sum() < -123 problem.optimize() assert -74 < problem.objective_scores_sum()
def test_EnforceSequence_as_objective(): # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which # enzyme will be chosen and inserted in the sequence depends on the other # constraint on GC content numpy.random.seed(1234) n_nucleotides = 15 start = 50 location = (start, start + n_nucleotides) problem = DnaOptimizationProblem( sequence=25 * "ATGC", constraints=[AvoidPattern("ATGC")], objectives=[EnforceSequence("W" * n_nucleotides, location=location)]) assert problem.objective_scores_sum() < 0 problem.resolve_constraints() problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_match_usage_short_sequence(): numpy.random.seed(123) protein = "DDDKKKKKK" sequence = reverse_translate(protein) harmonization = CodonOptimize(species="b_subtilis", method="match_codon_usage") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization], logger=None, ) assert problem.objective_scores_sum() < -5.5 problem.optimize() assert -0.6 < problem.objective_scores_sum() print(problem.objective_scores_sum()) assert problem.sequence == "GATGATGACAAGAAAAAGAAAAAAAAA"
def test_codon_optimize_match_usage_gfp_sequence(): sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG" "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC" "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC") spec = CodonOptimize(species="s_cerevisiae", method="match_codon_usage") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[spec], logger=None, ) assert problem.objective_scores_sum() < -61 problem.optimize() assert problem.objective_scores_sum() > -16 # Just for coverage, we run the compare_frequency function in text mode spec = problem.objectives[0] codons = spec.get_codons(problem) print(spec.compare_frequencies(codons, text_mode=True))
def test_avoid_change_as_objectives_basics(): numpy.random.seed(123) results = [] for boost in (0, 0.1, 0.2, 1): sequence = random_dna_sequence(1000, seed=123) problem = DnaOptimizationProblem( sequence=sequence, objectives=[ EnforceGCContent( mini=0.45, maxi=0.55, window=80).copy_with_changes(locations_span=300), AvoidChanges(boost=boost).as_passive_objective() ]) problem.optimize() differences = sequences_differences(problem.sequence, problem.sequence_before) results.append(differences) assert results[0] > 40 assert (results[0] > results[1] > results[2] > results[3]) assert results[-1] == 0
def create_new_sequence( self, naive_target_sequence : str, codon_usage_table : Optional[str], existing_sequences : List[str] ) -> str: """Run DNAChisel to create a new codon optimized DNA sequence """ constraints=[ EnforceTranslation(), #EnforceGCContent(mini=0.4, maxi=0.6, window=60), ] constraints.extend([ AvoidPattern(sequence) for sequence in existing_sequences ]) problem = DnaOptimizationProblem( sequence=naive_target_sequence, constraints=constraints, objectives=[MatchTargetCodonUsage(species="s_cerevisiae")], ) #print("\nBefore optimization:\n") #print(problem.constraints_text_summary()) #print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize() #print("\nAfter optimization:\n") #print(problem.constraints_text_summary()) #print(problem.objectives_text_summary()) return problem.sequence
def test_optimization_1(): company_ingen = CommercialDnaOffer( name="Company InGen", pricing=PerBasepairPricing(0.08), sequence_constraints=[NoPatternConstraint(enzyme="AarI")], ) company_delux = CommercialDnaOffer( name="Company Delux", pricing=PerBasepairPricing(0.66), sequence_constraints=[], ) assembly_station = DnaAssemblyStation( name="Gibson Assembly Station", assembly_method=GibsonAssemblyMethod( overhang_selector=FixedSizeSegmentSelector(20), min_segment_length=200, max_segment_length=1200, ), supplier=[company_ingen, company_delux], coarse_grain=20, # a_star_factor="auto", ) sequence_path = os.path.join("tests", "data", "test_optimization_sequence_1.fa") sequence = load_record(sequence_path) objective = OptimizeManufacturability(assembly_station) problem = DnaOptimizationProblem(sequence=sequence, objectives=[objective]) quote = objective.get_quote(problem) score = problem.objective_scores_sum() assert -367 < score < -366 problem.randomization_threshold = 0 problem.max_random_iters = 5 problem.optimize() score = problem.objective_scores_sum() assert -244 < score < -243
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )
random_protein_sequence, reverse_translate, CodonOptimize, EnforceTranslation, AvoidPattern, EnforceGCContent, ) protein = random_protein_sequence(1000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation(), AvoidPattern("BsmBI_site"), EnforceGCContent(mini=0.4, maxi=0.6, window=60), ], objectives=[CodonOptimize(species="s_cerevisiae")], ) print("\nBefore optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize() print("\nAfter optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary())