def test_EnforceTranslation_error_location_smaller_than_translation(): """Providing a location that is not multiple of 3 raises an error""" numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(15, seed=123)) with pytest.raises(ValueError) as err: _ = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation( translation=random_protein_sequence(30, seed=111)) ], logger=None, ) assert str(err.value).startswith("Window size")
def experiment_1(seed=123): """A DNA chisel optimization whose results produced the file test_determinism.py""" np.random.seed(seed) sequence = dc.reverse_translate(dc.random_protein_sequence(50)) # MAXIMIZE THE GC CONTENT problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceGCContent(target=1)], logger=None, ) problem.optimize() # BRING THE GC CONTENT BACK TO 50% problem = dc.DnaOptimizationProblem( sequence=problem.sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceGCContent(target=0.5)], logger=None, ) problem.optimize() return problem.sequence
def test_EnforceTranlationError(): """Providing a location that is not multiple of 3 raises an error""" numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(50, seed=123)) with pytest.raises(ValueError) as err: problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation(location=(0, 16))], ) assert "Location 0-16(+) has length 16" in str(err.value)
def test_EnforceTranlation(): numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(50, seed=123)) problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidPattern("AAA"), EnforceTranslation()], ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_codon_optimize_bestcodon(): numpy.random.seed(123) protein = random_protein_sequence(3000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli')] ) assert problem.objective_scores_sum() < 0 problem.optimize() assert problem.objective_scores_sum() == 0
def test_codon_optimize_harmonized(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[CodonOptimize(species='e_coli', mode='harmonized')] ) assert (-700 < problem.objective_scores_sum() < -600) problem.optimize() assert (-350 < problem.objective_scores_sum())
def test_maximal_protein_sequence_change(): np.random.seed(123) protein = dc.random_protein_sequence(200) sequence = dc.reverse_translate(protein) problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[dc.EnforceTranslation()], objectives=[dc.EnforceChanges()], ) problem.resolve_constraints() problem.optimize() assert problem.number_of_edits() == 238 assert dc.translate(problem.sequence) == protein
def test_EnforceTranlationReversed(): numpy.random.seed(1234) sequence = reverse_translate(random_protein_sequence(50, seed=123)) rev_sequence = reverse_complement(sequence) problem = DnaOptimizationProblem( sequence=rev_sequence, constraints=[ AvoidPattern("AGC"), EnforceTranslation(location=(0, len(sequence), -1)) ], ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def experiment_2(seed=123): np.random.seed(seed) sequence = dc.reverse_translate(dc.random_protein_sequence(1000)) problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforceTranslation(), dc.EnforceGCContent(mini=0.4, maxi=0.6, window=50), ], objectives=[dc.CodonOptimize(species="e_coli")], logger=None, ) problem.resolve_constraints() problem.optimize() return problem.sequence
def test_codon_optimize_harmonize_rca_short_sequence(): protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) harmonization = CodonOptimize(species="h_sapiens", original_species="e_coli", method="harmonize_rca") problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[harmonization], logger=None, ) assert problem.objective_scores_sum() < -123 problem.optimize() assert -74 < problem.objective_scores_sum()
def test_codon_optimize_match_usage(): numpy.random.seed(123) protein = random_protein_sequence(500, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[EnforceTranslation()], objectives=[ CodonOptimize(species="e_coli", method="match_codon_usage") ], logger=None, ) assert -600 < problem.objective_scores_sum() < -550 problem.optimize() assert -350 < problem.objective_scores_sum()
def test_insert_and_erase_pattern(): numpy.random.seed(123) protein = dc.random_protein_sequence(100) pattern = "ATGC" # CREATE A SEQUENCE WITH 0 PATTERN OCCURENCES sequence = dc.random_compatible_dna_sequence( sequence_length=300, constraints=[ dc.EnforceTranslation(translation=protein), dc.AvoidPattern(pattern), ], logger=None, ) # NOW INCREASE PATTERN OCCURENCES FROM 0 TO 5 problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=5), dc.EnforceTranslation(), ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -5 problem.resolve_constraints() assert problem.all_constraints_pass() sequence = problem.sequence # NOW DECREASE THE NUMBER OF OCCURENCES FROM 5 TO 2 problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=2), dc.EnforceTranslation(), ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -3 problem.resolve_constraints() assert problem.all_constraints_pass()
"""Example of use of the AvoidPAttern specification""" from dnachisel import ( DnaOptimizationProblem, random_protein_sequence, reverse_translate, CodonOptimize, EnforceTranslation, AvoidPattern, EnforceGCContent, ) protein = random_protein_sequence(1000, seed=123) sequence = reverse_translate(protein) problem = DnaOptimizationProblem( sequence=sequence, constraints=[ EnforceTranslation(), AvoidPattern("BsmBI_site"), EnforceGCContent(mini=0.4, maxi=0.6, window=60), ], objectives=[CodonOptimize(species="s_cerevisiae")], ) print("\nBefore optimization:\n") print(problem.constraints_text_summary()) print(problem.objectives_text_summary()) problem.resolve_constraints(final_check=True) problem.optimize()
all_9mers = [sequence[i:i + 9] for i in range(len(sequence) - 9)] number_of_non_unique_9mers = sum([ count for ninemer, count in Counter(all_9mers).items() if count > 1 ]) score = -(9.0 * number_of_non_unique_9mers) / len(sequence) return SpecEvaluation(self, problem, score=score, locations=[Location(0, len(sequence))], message="Score: %.02f (%d non-unique ninemers)" % (score, number_of_non_unique_9mers)) def __str__(self): """String representation.""" return "MinimizeNinemersScore" sequence = reverse_translate(random_protein_sequence(300)) problem = DnaOptimizationProblem(sequence=sequence, constraints=[EnforceTranslation()], objectives=[MinimizeNinemersScore()]) print("\n=== Status before optimization ===") print(problem.objectives_text_summary()) problem.optimize() print("\n=== Status after optimization ===") print(problem.objectives_text_summary()) print(problem.constraints_text_summary(failed_only=True))