def load_user_options(args, location): assert(isinstance(location, Location)) #set enforce translation to the whole thing constraints = [] objectives = [] if args.harmonized: opt_mode = 'harmonized' else: opt_mode = 'best_codon' objectives += [ CodonOptimize(species=args.species, location=location, mode=opt_mode) ] constraints += [ EnforceTranslation(location=location) ] if args.avoid_homopolymers: constraints += [ AvoidPattern(HomopolymerPattern("A",args.avoid_homopolymers),location=location), AvoidPattern(HomopolymerPattern("T",args.avoid_homopolymers),location=location), AvoidPattern(HomopolymerPattern("G",args.avoid_homopolymers),location=location), AvoidPattern(HomopolymerPattern("C",args.avoid_homopolymers),location=location)] if args.avoid_hairpins: constraints += [AvoidHairpins(location=location)] if args.avoid_patterns: constraints += [AvoidPattern(pattern,location=location) for pattern in args.avoid_patterns] #NOTE! Printing this to a template is broken if args.avoid_restriction_sites: constraints += [AvoidPattern(EnzymeSitePattern(enzy),location=location) for enzy in args.avoid_restriction_sites] if args.constrain_global_GC_content: constraints += [EnforceGCContent(mini=args.global_GC_content_min, maxi=args.global_GC_content_max, location=location)] if args.constrain_local_GC_content: constraints += [EnforceGCContent(mini=args.local_GC_content_min, maxi=args.global_GC_content_max, window=args.local_GC_content_window, location=location)] if args.constrain_terminal_GC_content: constraints += [EnforceTerminalGCContent(mini=args.terminal_GC_content_min, maxi=args.terminal_GC_content_max, window_size=8, location=location)] if args.constrain_CAI: constraints += [ConstrainCAI(species=args.species, minimum=args.constrain_CAI_minimum, location=location)] if args.optimize_dicodon_frequency: objectives += [MaximizeDicodonAdaptiveIndex()] if args.kmers: objectives += [MinimizeKmerScore(k=args.kmers, boost=args.avoid_kmers_boost, location=location)] if args.avoid_secondary_structure: objectives += [MinimizeSecondaryStructure(max_energy=args.avoid_secondary_structure_max_e, location=location, boost=args.avoid_secondary_structure_boost)] if args.avoid_initiator_secondary_structure: objectives += [MinimizeSecondaryStructure(max_energy=args.avoid_initiator_secondary_structure_max_e, location=location, optimize_initiator=True, boost=args.avoid_initiator_secondary_structure_boost)] return objectives, constraints
def optimize(self, codon_table): self.optimize_frequent(codon_table) # return opt_codons = self.__vaccine_codons_gen.copy() self.__vaccine_codons_gen.clear() vac_strand = self.get_strand(opt_codons) #vir_strand = self.get_strand(self.__virus_codons) codon_table = pct.get_codons_table(codon_table) problem = DnaOptimizationProblem( sequence=vac_strand, constraints=[ EnforceTranslation(genetic_table='Standard', start_codon='ATG'), EnforceGCContent(mini=0.54, maxi=0.9, window=120) ], objectives=[ CodonOptimize(method="use_best_codon", codon_usage_table=codon_table) ] ) problem.resolve_constraints() problem.optimize() self.__vaccine_codons_gen = [] count = 1 vcodon = "" for x in problem.sequence: if count % 3 == 0: vcodon += x self.__vaccine_codons_gen.append(vcodon) vcodon = "" else: vcodon += x count += 1 return
def test_codon_optimize_match_usage_gfp_sequence(): sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG" "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC" "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC") spec = CodonOptimize(species="s_cerevisiae", method="match_codon_usage") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[spec], logger=None, ) assert problem.objective_scores_sum() < -61 problem.optimize() assert problem.objective_scores_sum() > -16 # Just for coverage, we run the compare_frequency function in text mode spec = problem.objectives[0] codons = spec.get_codons(problem) print(spec.compare_frequencies(codons, text_mode=True))
def test_codon_optimize_with_custom_table(): problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize( codon_usage_table=biotools.CODON_USAGE_TABLES['b_subtilis'])] ) assert (problem.objective_scores_sum() < -10) problem.optimize() assert (problem.objective_scores_sum() == 0)
def test_codon_optimize_with_custom_table(): table = get_codons_table("b_subtilis") problem = DnaOptimizationProblem( sequence=random_dna_sequence(1200, seed=123), constraints=[EnforceTranslation()], objectives=[CodonOptimize(codon_usage_table=table)], logger=None, ) assert problem.objective_scores_sum() < -10 problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_as_hard_constraint(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_codon_optimize_harmonized(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli', mode='harmonized')] ) assert (-700 < problem.objective_scores_sum() < -600) problem.optimize() assert (-350 < problem.objective_scores_sum())
def test_codon_optimize_bestcodon(): numpy.random.seed(123) protein = random_protein_sequence(3000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli')] ) assert problem.objective_scores_sum() < 0 problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_harmonized_short_sequence(): protein = "DDDKKKKKK" sequence = reverse_translate(protein) harmonization = CodonOptimize(species='b_subtilis', mode='harmonized') problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization] ) assert problem.objective_scores_sum() < -7 problem.optimize() assert -1 < problem.objective_scores_sum()
def test_codon_optimize_harmonize_rca_short_sequence(): protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) harmonization = CodonOptimize(species="h_sapiens", original_species="e_coli", method="harmonize_rca") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization], logger=None, ) assert problem.objective_scores_sum() < -123 problem.optimize() assert -74 < problem.objective_scores_sum()
def test_codon_optimize_match_usage(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[ CodonOptimize(species="e_coli", method="match_codon_usage") ], logger=None, ) assert -600 < problem.objective_scores_sum() < -550 problem.optimize() assert -350 < problem.objective_scores_sum()
def test_codon_optimize_match_usage_short_sequence(): numpy.random.seed(123) protein = "DDDKKKKKK" sequence = reverse_translate(protein) harmonization = CodonOptimize(species="b_subtilis", method="match_codon_usage") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization], logger=None, ) assert problem.objective_scores_sum() < -5.5 problem.optimize() assert -0.6 < problem.objective_scores_sum() print(problem.objective_scores_sum()) assert problem.sequence == "GATGATGACAAGAAAAAGAAAAAAAAA"
random_protein_sequence, reverse_translate, CodonOptimize, EnforceTranslation, AvoidPattern, EnforceGCContent, ) protein = random_protein_sequence(1000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation(), AvoidPattern("BsmBI_site"), EnforceGCContent(mini=0.4, maxi=0.6, window=60), ], objectives=[CodonOptimize(species="s_cerevisiae")], ) print("\nBefore optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize() print("\nAfter optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary())
"""Example of use of the AvoidPAttern specification""" from dnachisel import (DnaOptimizationProblem, random_protein_sequence, reverse_translate, CodonOptimize, EnforceTranslation, AvoidPattern, EnforceGCContent) protein = random_protein_sequence(1000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation(), AvoidPattern("BsmBI_site"), EnforceGCContent(mini=0.4, maxi=0.6, window=60) ], objectives=[CodonOptimize(species='s_cerevisiae')]) print("\nBefore optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize() print("\nAfter optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary())
"""Example of use of the AvoidPAttern specification""" from dnachisel import (DnaOptimizationProblem, random_dna_sequence, CodonOptimize, Location, EnforceTranslation) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ]) print("\nBefore resolution:\n") print(problem.constraints_text_summary()) problem.resolve_constraints() print("\nAfter resolution:\n") print(problem.constraints_text_summary())
codon_table_11[k][k2] = RSCU_list[k2] print("\nOptimizing codons for input gene list") #Read gene fasta sequence and initiate optimizer problem = DnaOptimizationProblem( sequence=gene, constraints=[ EnforceTranslation(), AvoidPattern("BsmBI_site", "BamHI"), EnforceTranslation(), EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC ], objectives=[CodonOptimize(codon_usage_table=codon_table_11)], ) if taxid and not input_path: print("\nOptimizing codons for taxonomic ID: " + taxid) #Read gene fasta sequence and initiate optimizer if not protein_flag: problem = DnaOptimizationProblem( sequence=gene, constraints=[ #EnforceSequence(sequence = "ATG", location=(0, 2)), AvoidChanges(location=(0, 2)), AvoidPattern("BsmBI_site", "BamHI"), EnforceTranslation(), EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC
"""Example of use of the CodonOptimize specification.""" from dnachisel import (DnaOptimizationProblem, random_protein_sequence, CodonOptimize, reverse_translate, EnforceTranslation) protein = random_protein_sequence(3000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem(sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize('e_coli')]) print("\nBefore optimization:\n") print(problem.objectives_text_summary()) problem.optimize() print("\nAfter optimization:\n") print(problem.objectives_text_summary())
CDS_constraints = [] for (start, end, strand) in CDS_list: if strand == 1: promoter_region = (start - 30, start - 1) else: promoter_region = (end + 1, end + 30) CDS_constraints += [ AvoidChanges(promoter_region), EnforceTranslation((start, end, strand)), ] # DEFINE OBJECTIVES objectives = [EnforceGCContent(0.51, boost=10000)] + [ CodonOptimize("e_coli", location=(start, end, strand)) for (start, end, strand) in CDS_list ] # DEFINE AND SOLVE THE PROBLEM problem = DnaOptimizationProblem( sequence=record, constraints=dna_provider_constraints + CDS_constraints, objectives=objectives, ) print("\n\n=== Initial Status ===") print(problem.constraints_text_summary(failed_only=True)) print(problem.objectives_text_summary())
"""Example of use of the AvoidPAttern specification""" from dnachisel import (DnaOptimizationProblem, random_protein_sequence, reverse_translate, CodonOptimize, EnforceTranslation, AvoidPattern, EnforceGCContent) protein = random_protein_sequence(1000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation(), AvoidPattern(enzyme="BsmBI"), EnforceGCContent(mini=0.4, maxi=0.6, window=60) ], objectives=[ CodonOptimize(species='s_cerevisiae') ] ) print ("\nBefore optimization:\n") print (problem.constraints_text_summary()) print (problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize() print ("\nAfter optimization:\n") print (problem.constraints_text_summary()) print (problem.objectives_text_summary())
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )