def test_codon_optimize_as_hard_constraint(): numpy.random.seed(123) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ] ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def test_EnforceTranslation_bacterial_valine_antisense(): table_name = "Bacterial" protein = "LLTMMVTTTTVMVL" protein_sequence = reverse_translate(protein, table=table_name) for first_codon_before, first_codon_after in [ ("ATG", "ATG"), # methionine stays the only methionine codon ("GTG", "GTG"), # valine-start-codon stays the only valine-start-codon ]: sequence = first_codon_before + protein_sequence cds_constraint = EnforceTranslation( genetic_table="Bacterial", start_codon="keep", location=Location(0, len(sequence), -1), ) problem = DnaOptimizationProblem( sequence=reverse_complement(sequence), constraints=[cds_constraint], objectives=[EnforceChanges()], logger=None, ) assert problem.constraints[0].translation == "MLLTMMVTTTTVMVL" problem.optimize() problem_sequence_rv = reverse_complement(problem.sequence) protein_after = translate( problem_sequence_rv, table_name, assume_start_codon=True ) assert protein_after == "M" + protein assert problem_sequence_rv[:3] == first_codon_after
def test_enforce_pattern_basics(): numpy.random.seed(123) for seed in [2, 3, 123456]: # The seeds cover various cases: # 2: the problem has no occurences instead of 1 wanted # 3: the pattern has no occurences instead of 1 wanted # 123456: the pattern is over-represented (4 times instead of 1) sequence = random_dna_sequence(5000, seed=seed) constraints = [ EnforceTranslation(location=Location(1000, 2500)), EnforceTranslation(location=Location(3000, 4500)), EnforcePatternOccurence("ANANANANTT", location=Location(1100, 2150)), ] problem = DnaOptimizationProblem(sequence=sequence, constraints=constraints, logger=None) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
def evaluate(self, problem): """Return Gen9's ninemer score for the problem' sequence""" sequence = problem.sequence all_9mers = [sequence[i:i + 9] for i in range(len(sequence) - 9)] number_of_non_unique_9mers = sum([ count for ninemer, count in Counter(all_9mers).items() if count > 1 ]) score = -(9.0 * number_of_non_unique_9mers) / len(sequence) return SpecEvaluation(self, problem, score=score, locations=[Location(0, len(sequence))], message="Score: %.02f (%d non-unique ninemers)" % (score, number_of_non_unique_9mers))
def __init__( self, left_overhang, right_overhang, left_addition="", right_addition="", enzyme="BsmBI", extra_avoided_sites=(), description="Golden Gate domesticator", name="unnamed_domesticator", cds_by_default=False, constraints=(), objectives=(), ): self.enzyme = enzyme self.left_overhang = left_overhang left_overhang = sequence_to_biopython_record(left_overhang) self.right_overhang = right_overhang right_overhang = sequence_to_biopython_record(right_overhang) for seq in [left_overhang, right_overhang]: annotate_record(seq, label=str(seq.seq)) enzyme_seq = Restriction.__dict__[enzyme].site enzyme_seq = sequence_to_biopython_record(enzyme_seq) annotate_record(enzyme_seq, label=enzyme) self.enzyme_seq = enzyme_seq left_flank = self.enzyme_seq + "A" + left_overhang + left_addition right_flank = (right_addition + right_overhang + (self.enzyme_seq + "A").reverse_complement()) self.extra_avoided_sites = extra_avoided_sites constraints = list(constraints) + [(lambda seq: AvoidPattern( EnzymeSitePattern(enzyme), location=Location(len(left_flank), len(left_flank) + len(seq)), )) for enz in ([enzyme] + list(extra_avoided_sites))] PartDomesticator.__init__( self, left_flank=left_flank, right_flank=right_flank, constraints=constraints, objectives=objectives, description=description, name=name, cds_by_default=cds_by_default, )
def test_EnforceSequence(): # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which # enzyme will be chosen and inserted in the sequence depends on the other # constraint on GC content numpy.random.seed(1234) for symbol, nucleotides in [("W", "AT"), ("S", "GC")]: n_nucleotides = 15 start = 50 location = (start, start + n_nucleotides) problem = DnaOptimizationProblem( sequence=25 * "ATGC", constraints=[ AvoidPattern("ATGC"), AvoidPattern("AAA"), AvoidPattern("GGG"), EnforceSequence(n_nucleotides * symbol, location=location), ], ) problem.max_random_iters = 10000 problem.resolve_constraints() s, e = start, start + n_nucleotides assert all([n in nucleotides for n in problem.sequence[s:e]]) # Test -1 strand: seq = "ATG" + "CAG" + "AGCAAGGTGCTGCT" problem = DnaOptimizationProblem( sequence=seq, constraints=[ EnforcePatternOccurence( pattern="CTG", # CAG on strand +1 occurences=2, strand=-1, location=Location(start=0, end=50), ) ], ) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
print("DOWNLOADING AND PARSING THE GENBANK DATA...") url = ( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + "db=nucleotide&id=48994873&rettype=gb&retmode=txt" ) genbank_data = request.urlopen(url).read().decode("utf-8") genbank_record = load_record(StringIO(genbank_data), file_format="genbank") print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...") constraints = [] for feature in genbank_record.features: if feature.type == "gene" and len(feature.location.parts) == 1: location = Location.from_biopython_location(feature.location) if (len(location) % 3 == 0) and len(location) > 100: gene_constraints = [ EnforceTranslation(location = location), AvoidPattern("BsmBI_site", location), EnforceGCContent( mini=0.40, maxi=0.60, window=150, location=location ), ] constraints.extend(gene_constraints) problem = DnaOptimizationProblem(genbank_record, constraints) print("RESOLVING THE CONSTRAINTS...") problem.logger.ignore_bars_under = 50 problem.resolve_constraints()
from dnachisel import (EnforceTranslation, DnaOptimizationProblem, random_dna_sequence, Location, EnforcePattern) # sequence = random_dna_sequence(5000, seed=123456) # sequence = random_dna_sequence(5000, seed=2) # sequence = random_dna_sequence(5000, seed=3) # for seed in [2, 3, 123456]: sequence = random_dna_sequence(5000, seed=123) constraints = [ EnforceTranslation(Location(1000, 2500)), EnforceTranslation(Location(3000, 4500)), EnforcePattern("ANANANANTT", location=Location(1100, 2150)), EnforcePattern("ATGATGCCTK", location=Location(2700, 2800)) ] problem = DnaOptimizationProblem(sequence=sequence, constraints=constraints) print(problem.constraints_text_summary()) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass() print(problem.constraints_text_summary())
"""Example of use of the AvoidPAttern specification""" from dnachisel import (DnaOptimizationProblem, random_dna_sequence, CodonOptimize, Location, EnforceTranslation) problem = DnaOptimizationProblem( sequence=random_dna_sequence(2000, seed=123), constraints=[ EnforceTranslation(location=Location(1000, 1300)), CodonOptimize(location=Location(1000, 1300), species='e_coli') ]) print("\nBefore resolution:\n") print(problem.constraints_text_summary()) problem.resolve_constraints() print("\nAfter resolution:\n") print(problem.constraints_text_summary())
base, ext = os.path.splitext(os.path.basename(args.vector)) output_filename = base + "_" + destination + ext naive_construct, objectives, constraints = load_template(args.vector, placeholder, destination) else: output_filename = destination + ".gb" objectives = [] constraints = [] naive_construct = placeholder whole_seq_feat = SeqFeature() whole_seq_feat.type = "misc_feature" whole_seq_feat.qualifiers['label'] = [destination] whole_seq_feat.location = FeatureLocation(0,len(placeholder),strand=1) naive_construct.features.append(whole_seq_feat) dest_feat = find_annotation(naive_construct, placeholder.name) dest_loc = Location.from_biopython_location(dest_feat.location) user_objectives, user_constraints = load_user_options(args, dest_loc) objectives += user_objectives constraints += user_constraints problem = DnaOptimizationProblem(str(naive_construct.seq), constraints=constraints, objectives=objectives) domesticator_record = problem.to_record() mature_construct = naive_construct mature_construct.features.extend(domesticator_record.features)
def domesticate( self, dna_sequence=None, protein_sequence=None, is_cds="default", codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode="", barcode_spacer="AA", report_target=None, ): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank. edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path). barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if is_cds == "default": is_cds = self.cds_by_default if isinstance(dna_sequence, SeqRecord): problem = DnaOptimizationProblem.from_record(dna_sequence) for spec in problem.constraints + problem.objectives: spec.location += len(self.left_flank) extra_constraints = list(extra_constraints) + problem.constraints extra_objectives = list(extra_constraints) + problem.objectives if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, "__call__") else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, "__call__") else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append( CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger, ) all_constraints_pass = problem.all_constraints_pass() no_objectives = (len(problem.objectives) - self.minimize_edits) == 0 report_data = None optimization_successful = True message = "" # print (all_constraints_pass, no_objectives) if not (all_constraints_pass and no_objectives): problem.n_mutations = self.simultaneous_mutations if report_target is not None: (success, message, report_data) = problem.optimize_with_report( target=report_target, project_name=self.name) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True, ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, "genbank") return DomesticationResult( problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message, )
def test_enforce_pattern_options(): # Checks for Github issue #53 # Test 6 cases: location yes/no, 3 strand options sequence = "A" * 10 pattern = "C" * 4 # location=None problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand="from_location"), ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand="both") ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence(pattern, occurences=1, strand=-1) ], logger=None, ) assert problem.constraints[0].evaluate(problem).score == -1 problem.resolve_constraints() assert problem.all_constraints_pass() assert dc.reverse_complement( pattern) in problem.sequence # other strand used # location specificed # Use -1 strand from location: problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand="from_location", location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert dc.reverse_complement(pattern) in problem.sequence # Overwrite -1 strand to "both": problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand="both", location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence # uses +1 strand by default # Overwrite -1 strand to +1: problem = dc.DnaOptimizationProblem( sequence=sequence, constraints=[ dc.EnforcePatternOccurence( pattern, occurences=1, strand=1, location=Location(1, 6, strand=-1), ) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence # uses +1 strand
def test_avoid_pattern_options(): # Checks Github issue #53 pattern = "C" * 4 sequence = "A" * 6 + pattern # location=None problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidPattern(pattern, strand="from_location")], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern not in problem.sequence problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidPattern(pattern, strand="both")], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern not in problem.sequence problem = DnaOptimizationProblem( sequence=sequence, constraints=[AvoidPattern(pattern, strand=-1)], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() assert pattern in problem.sequence # location specified problem = DnaOptimizationProblem( sequence=sequence, constraints=[ AvoidPattern(pattern, location=Location(0, 10, -1), strand="from_location") ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() # sequence not changed because location strand is -1: assert pattern in problem.sequence problem = DnaOptimizationProblem( sequence=sequence, constraints=[ AvoidPattern(pattern, location=Location(0, 10, -1), strand="both") ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() # sequence changed because strand option overwrites location: assert pattern not in problem.sequence problem = DnaOptimizationProblem( sequence=sequence, constraints=[ AvoidPattern(pattern, location=Location(0, 10, 1), strand=-1) ], logger=None, ) problem.resolve_constraints() assert problem.all_constraints_pass() # sequence not changed because strand option overwrites location strand: assert pattern in problem.sequence
def test_location_strand_gets_conserved(): cst = AvoidPattern("AACAAAT", Location(4, 1624, -1)) location = Location(9, 10) new_cst = cst.localized(location) assert new_cst.location.to_tuple() == (4, 16, -1)