def work(self): data = self.data self.logger(message="Initializing...") if data.editFeatures: record = sequence_to_biopython_record(data.sequence.upper()) for feature in sorted(data.editedFeatures.values(), key=lambda f: (f.start, f.end)): annotate_record( record, feature_type="misc_feature", location=(feature.start, feature.end), label=feature.label, ) else: record = records_from_data_files([data.file])[0] problem = DnaOptimizationProblem.from_record(record, logger=self.logger) problem.optimization_stagnation_tolerance = 30 success, summary, zip_data = problem.optimize_with_report( target="@memory", project_name=record.id) return { "zip_file": { "data": data_to_html_data(zip_data, "zip"), "name": "optimization_report.zip", "mimetype": "application/zip", }, "success": success, "summary": summary, }
def work(self): data = self.data self.logger(message='Initializing...') if data.editFeatures: record = sequence_to_biopython_record(data.sequence.upper()) for feature in sorted(data.editedFeatures.values(), key=lambda f: (f.start, f.end)): annotate_record(record, feature_type="misc_feature", location=(feature.start, feature.end), label=feature.label) else: records, fmt = records_from_data_file(data.file) record = records[0] problem = DnaOptimizationProblem.from_record(record) problem.max_random_iters = 1000 problem.logger = self.logger success, summary, zip_data = optimization_with_report( target="@memory", problem=problem, project_name=record.id) return { 'zip_file': { 'data': data_to_html_data(zip_data, 'zip'), 'name': 'optimization_report.zip', 'mimetype': 'application/zip' }, 'success': success, 'summary': summary }
def __init__( self, name="unnamed domesticator", left_flank="", right_flank="", constraints=(), objectives=(), cds_by_default=False, description=None, simultaneous_mutations=1, minimize_edits=True, logger=None, ): if isinstance(left_flank, str): left_flank = sequence_to_biopython_record(left_flank) annotate_record(left_flank, label="left flank") if isinstance(right_flank, str): right_flank = sequence_to_biopython_record(right_flank) annotate_record(right_flank, label="right flank") self.name = name self.constraints = constraints self.left_flank = left_flank self.right_flank = right_flank self.constraints = list(constraints) self.objectives = list(objectives) self.description = description self.logger = logger self.simultaneous_mutations = simultaneous_mutations self.minimize_edits = minimize_edits self.cds_by_default = cds_by_default
def test_feature_to_spec(): sequence = random_dna_sequence(100) record = sequence_to_biopython_record(sequence) label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep" annotate_record(record, label=label) feature = record.features[0] specs = Specification.list_from_biopython_feature(feature) assert len(specs) == 3
def test_record_with_multispec_feature(): sequence = random_dna_sequence(100) record = sequence_to_biopython_record(sequence) label = "@gc(40-60%/20bp) & @no(BsaI_site) & @keep" annotate_record(record, label=label) problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 3 c1, c2, c3 = problem.constraints assert c1.mini == 0.4 assert c2.pattern.name == "BsaI"
def test_that_constraints_in_records_are_accounted_for(): sequence = "ATACGTCTCTAG" rec = sequence_to_biopython_record(sequence) annotate_record(rec, label="@cds") from genedom import BUILTIN_STANDARDS emma = BUILTIN_STANDARDS["EMMA"] p7 = emma.domesticators["p7"] result = p7.domesticate(rec) seq_after = str( result.record_after[len(p7.left_flank):-len(p7.right_flank)].seq) assert translate(seq_after) == translate(sequence)
def __init__( self, left_overhang, right_overhang, left_addition="", right_addition="", enzyme="BsmBI", extra_avoided_sites=(), description="Golden Gate domesticator", name="unnamed_domesticator", cds_by_default=False, constraints=(), objectives=(), ): self.enzyme = enzyme self.left_overhang = left_overhang left_overhang = sequence_to_biopython_record(left_overhang) self.right_overhang = right_overhang right_overhang = sequence_to_biopython_record(right_overhang) for seq in [left_overhang, right_overhang]: annotate_record(seq, label=str(seq.seq)) enzyme_seq = Restriction.__dict__[enzyme].site enzyme_seq = sequence_to_biopython_record(enzyme_seq) annotate_record(enzyme_seq, label=enzyme) self.enzyme_seq = enzyme_seq left_flank = self.enzyme_seq + "A" + left_overhang + left_addition right_flank = (right_addition + right_overhang + (self.enzyme_seq + "A").reverse_complement()) self.extra_avoided_sites = extra_avoided_sites constraints = list(constraints) + [(lambda seq: AvoidPattern( EnzymeSitePattern(enzyme), location=Location(len(left_flank), len(left_flank) + len(seq)), )) for enz in ([enzyme] + list(extra_avoided_sites))] PartDomesticator.__init__( self, left_flank=left_flank, right_flank=right_flank, constraints=constraints, objectives=objectives, description=description, name=name, cds_by_default=cds_by_default, )
def make_restriction_part(part_length, left_overhang, right_overhang, enzyme, forbidden_enzymes, assembly_enzyme='BsmBI'): l_left = len(left_overhang) l_right = len(right_overhang) left_overhang_location = (0, l_left) right_overhang_location = (l_left + part_length, l_left + part_length + l_right) center_location = (l_left, l_left + part_length) core_sequence = (left_overhang + dc.random_dna_sequence(part_length) + right_overhang) enforce_enzyme = dc.EnforcePatternOccurence( enzyme=enzyme, location=center_location) problem = dc.DnaOptimizationProblem( sequence=core_sequence, constraints=[ dc.AvoidChanges(left_overhang_location), dc.AvoidChanges(right_overhang_location), ] + [enforce_enzyme] + [ dc.AvoidPattern(enzyme=enzyme_name) for enzyme_name in forbidden_enzymes + [assembly_enzyme] ] ) problem.resolve_constraints() core_sequence = dc.sequence_to_biopython_record(problem.sequence) for loc in [left_overhang_location, right_overhang_location]: dc.annotate_record(core_sequence, loc, 'overhang') site_location = enforce_enzyme.evaluate(problem).data['matches'][0] dc.annotate_record(core_sequence, site_location.to_tuple(), enzyme) assembly_site = Restriction.__dict__[assembly_enzyme].site flank = dc.sequence_to_biopython_record(assembly_site + 'A') dc.annotate_record(flank, label='flank') return flank + core_sequence + flank.reverse_complement()
def test_parameterization(): def all_none(variables): return all([c is None for c in variables]) problem1 = dc.DnaOptimizationProblem( sequence=200 * "A", constraints=[ dc.EnforceChanges(), dc.EnforceChanges(minimum=20), dc.EnforceChanges(minimum_percent=5), ], objectives=[ dc.EnforceChanges(), dc.EnforceChanges(amount=20), dc.EnforceChanges(amount_percent=5), ], ) record = dc.sequence_to_biopython_record(200 * "A") dc.annotate_record(record, label="@change") dc.annotate_record(record, label="@change(minimum=20)") dc.annotate_record(record, label="@change(minimum=5%)") dc.annotate_record(record, label="~change") dc.annotate_record(record, label="~change(amount=20)") dc.annotate_record(record, label="~change(5%)") problem2 = dc.DnaOptimizationProblem.from_record(record) for problem in [problem1, problem2]: # CHECK CONSTRAINTS c100 = problem.constraints[0] assert c100.minimum == 200 assert c100.minimum_percent == 100 assert all_none([c100.amount, c100.amount_percent]) c20 = problem.constraints[1] assert c20.minimum == 20 assert all_none([c20.minimum_percent, c20.amount, c20.amount_percent]) c5 = problem.constraints[2] assert c5.minimum == 10 assert c5.minimum_percent == 5 assert all_none([c5.amount, c5.amount_percent]) # CHECK OBJECTIVES o100 = problem.objectives[0] assert o100.amount == 200 assert o100.amount_percent == 100 assert all_none([o100.minimum, o100.minimum_percent]) o20 = problem.objectives[1] assert o20.amount == 20 assert all_none([o20.minimum_percent, o20.minimum, o20.amount_percent]) o5 = problem.objectives[2] assert o5.amount == 10 assert o5.amount_percent == 5 assert all_none([o5.minimum, o5.minimum_percent])
def test_all_shorthands(): """This test compiles all shorthands as a check that nothing is broken.""" numpy.random.seed(123) sequence = random_dna_sequence(1000) record = sequence_to_biopython_record(sequence) annotate_record(record, (100, 900), label="@no(CATG)") annotate_record(record, (100, 900), label="@gc(40-60%)") annotate_record(record, (100, 900), label="@insert(AarI_site)") annotate_record(record, (650, 752), label="@cds") annotate_record(record, (100, 200), label="@keep") annotate_record(record, (250, 273), label="@primer") annotate_record(record, (250, 280), label="@change") annotate_record(record, (943, 950), label="@sequence(AKGNTKT)") annotate_record(record, (955, 958), label="@sequence(ATT|ATC|GGG)") problem = DnaOptimizationProblem.from_record(record) assert len(problem.constraints) == 13 # AllowPrimer counts for 4 specs. assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()