def test_design_with_overhang_primers(repeat, span_cost): goal = random_record(3000) make_circular_and_id([goal]) r1 = random_record(100) + goal[1000:2000] + random_record(100) p1 = goal[970:1030] p2 = goal[1970:2030].reverse_complement() r2 = goal[2000:] + goal[:1000] p3 = goal[1970:2030] make_linear_and_id([r1, p1, p2, r2, p3]) design = Design(span_cost) design.add_materials(primers=[p1, p2, p3], templates=[r1, r2], queries=[goal], fragments=[]) expected_path = [ (970, False, "A", True), (2030, False, "B", True), (1970, False, "A", True), (4000, True, "B", True), ] check_design_result(design, expected_path)
def test_design_with_overlaps_with_templates(span_cost): """Fragments with overlaps.""" goal = random_record(3000) make_circular_and_id([goal]) r1 = goal[-40:] + goal[:1000] r2 = goal[970:2000] r3 = goal[1950:] p1 = goal[-40:] make_linear_and_id([r1, r2, r3, p1]) design = Design(span_cost) design.add_materials(primers=[p1], fragments=[], queries=[goal], templates=[r1, r2, r3]) expected_path = [ (970, True, "A", True), (1950, True, "B", False), (1950, True, "A", False), (3000, True, "B", True), (3000 - 40, False, "A", True), (4000, True, "B", True), ] check_design_result(design, expected_path, check_path=True)
def test_design_task_with_gaps(span_cost): """Fragments with overlaps.""" goal = random_record(3000) make_circular_and_id([goal]) r1 = goal[:950] r2 = goal[1000:2000] r3 = goal[2050:] make_linear_and_id([r1, r2, r3]) design = Design(span_cost) design.add_materials(primers=[], templates=[r1, r2, r3], queries=[goal], fragments=[]) expected_path = [ (0, True, "A", False), (950, True, "B", False), (1000, True, "A", False), (2000, True, "B", False), (2050, True, "A", False), (3000, True, "B", False), ] check_design_result(design, expected_path)
def test_single_fragment(span_cost): goal = random_record(3000) make_circular_and_id([goal]) r1 = goal[177:2255] make_linear_and_id([r1]) design = Design(span_cost) design.add_materials(primers=[], templates=[r1], queries=[goal], fragments=[]) expected_path = [(177, True, "A", False), (2255, True, "B", False)] check_design_result(design, expected_path)
def test_a_reverse_pcr_fragment(span_cost): goal = random_record(3000) make_circular_and_id([goal]) t1 = goal[1000:2500].reverse_complement() p1 = goal[2500 - 20:2510].reverse_complement() make_linear_and_id([p1, t1]) design = Design(span_cost) design.add_materials(primers=[p1], templates=[t1], queries=[goal], fragments=[]) expected_path = [(1000, True, "A", False), (2510, False, "B", False)] check_design_result(design, expected_path)
def test_fully_overlapped(span_cost): goal = random_record(2000) make_circular_and_id([goal]) r1 = goal[1100:1300] p1 = goal[1177:1177 + 30] p2 = goal[1188:1188 + 30] p3 = goal[1225 - 30:1225].reverse_complement() make_linear_and_id([r1, p1, p2, p3]) design = Design(span_cost) design.add_materials(primers=[p1, p2, p3], templates=[r1], queries=[goal], fragments=[]) expected_path = [(1177, False, "A", False), (1300, True, "B", False)] check_design_result(design, expected_path)
def test_blast_has_same_results(span_cost): goal = random_record(3000) make_circular_and_id([goal]) r1 = random_record(100) + goal[1000:2000] + random_record(100) p1 = goal[970:1030] p2 = goal[1970:2030].reverse_complement() r2 = goal[2000:] + goal[:1000] p3 = goal[1970:2030] make_linear_and_id([r1, p1, p2, r2, p3]) size_of_groups = [] for i in range(20): design = Design(span_cost) design.logger.set_level("INFO") design.add_materials(primers=[p1, p2, p3], templates=[r1, r2], queries=[goal], fragments=[]) design.compile() for container in design.container_list: size_of_groups.append(len(container.groups())) assert len(size_of_groups) == 20 assert len(set(size_of_groups)) == 1
def test_num_groups_vs_endpoints(here, paths, query, span_cost): primers = make_linear(load_fasta_glob(paths["primers"])) templates = load_genbank_glob(paths["templates"]) query_path = join(here, "data/test_data/genbank/designs", query) queries = make_circular(load_genbank_glob(query_path)) design = Design(span_cost) design.add_materials(primers=primers, templates=templates, queries=queries) design._blast() containers = design.container_list assert len(containers) == 1 container = containers[0] container.expand() groups = container.groups() print(len(groups)**2) a_arr = set() b_arr = set() for g in groups: a_arr.add(g.query_region.a) b_arr.add(g.query_region.b) print(len(a_arr) * len(b_arr))
def test_add_special_partition_node(self, span_cost): """This test adds a new unique node 'n3' with a unique type to simulate adding a partitioning to the graph. Such a procedure might be used for highly complex sequences. """ goal = random_record(4000) make_circular_and_id([goal]) r1 = goal[1000:2000] r2 = goal[200:500] make_linear_and_id([r1, r2]) design = Design(span_cost) design.add_materials( primers=[], templates=[r1, r2], queries=[goal], fragments=[] ) design.compile() import networkx as nx for qk, g in design.graphs.items(): query = design.seqdb[qk] gcopy = nx.DiGraph(g) for n1, n2, edata in g.edges(data=True): r = Region(n1.index, n2.index, len(query.seq), cyclic=True) if n1.type == "B" and n2.type == "A": # index = int((n1.index + n2.index) / 2) delta = int(len(r) / 2) index = r.t(delta + n1.index) n3 = AssemblyNode(index, False, str(uuid4()), overhang=True) edata1 = dict(edata) edata2 = dict(edata) edata1["material"] = edata["material"] / 10.0 edata2["material"] = edata["material"] / 10.0 edata1["span"] = 0 gcopy.add_edge(n1, n3, **edata1) gcopy.add_edge(n3, n2, **edata2) design.graphs[qk] = gcopy result = list(design.optimize().values())[0] assembly = result.assemblies[0] df = assembly.to_df() assert list(df["query_start"]) == [200, 500, 750, 1000, 2000, 3100] assert list(df["query_end"]) == [500, 750, 1000, 2000, 3100, 200]
def test_requires_synthesis_with_template_over_origin(span_cost): goal = random_record(5000) make_circular_and_id([goal]) r1 = goal[1000:2000] r2 = goal[3500:] + goal[:500] make_linear_and_id([r1, r2]) design = Design(span_cost) design.add_materials(primers=[], templates=[r1, r2], queries=[goal], fragments=[]) expected_path = [ (500, True, "B", False), (1000, True, "A", False), (2000, True, "B", False), (3500, True, "A", False), ] check_design_result(design, expected_path)
def test_case(span_cost): """This is a test case which has previously failed to find a solution. The case is that there are just two small fragments with a small <10bp gap. The solution should be to PCR amplify both fragment and synthesize the rest of the plasmid. """ goal = random_record(2000) make_circular_and_id([goal]) r1 = goal[1188:1230] r2 = goal[1238:1282] make_linear_and_id([r1, r2]) design = Design(span_cost) design.add_materials(primers=[], templates=[r1, r2], queries=[goal], fragments=[]) expected_path = [(1238, True, "A", False), (1282, True, "B", False)] check_design_result(design, expected_path)
def test_very_long_synthesizable_region(span_cost): goal = random_record(10000) make_circular_and_id([goal]) r1 = goal[4177:4255] r2 = goal[4188:4225] make_linear_and_id([r1, r2]) design = Design(span_cost) design.add_materials(primers=[], templates=[r1], queries=[goal], fragments=[]) expected_path = [ (500, True, "B", False), (1000, True, "A", False), (2000, True, "B", False), (2500, True, "A", False), ] with pytest.raises(NoSolution): check_design_result(design, expected_path)
def test_large_pkl(span_cost): """Expect more than one graph to be output if multiple queries are provided.""" design = Design.fake(n_designs=3, n_cyclic_seqs=100, n_linear_seqs=100, n_primers=100) design.compile() with logger.timeit("DEBUG", "pickling graphs"): pickle.loads(pickle.dumps(design.graphs)) with logger.timeit("DEBUG", "pickling containers"): pickle.loads(pickle.dumps(design.container_factory)) with logger.timeit("DEBUG", "pickling span_cost"): pickle.loads(pickle.dumps(span_cost))
def test_design_near_origin(span_cost): """Fragments with overlaps.""" goal = random_record(3000) make_circular_and_id([goal]) r1 = goal[-40:] + goal[:1000] r3 = goal[950:] + goal[:1] make_linear_and_id([r1, r3]) design = Design(span_cost) design.add_materials(primers=[], fragments=[r1, r3], queries=[goal], templates=[]) design.run() df = design.to_df()[1] print(df)
def test_cost_comparison_library(): design1, library = Design.fake( n_designs=3, n_linear_seqs=50, n_cyclic_seqs=50, n_primers_from_templates=500, shared_length=500, return_with_library=True, ) design2 = LibraryDesign(seqdb=design1.seqdb) designs = library["design"] plasmids = library["cyclic"] fragments = library["linear"] primers = library["short"] design2.add_materials(primers=primers, fragments=fragments, templates=plasmids, queries=designs) design1.run() design2.run() print("#" * 10 + "\nDesign\n" + "#" * 10) print(json.dumps(design1.status, indent=2)) print("#" * 10 + "\nLibraryDesign\n" + "#" * 10) print(json.dumps(design2.status, indent=2)) print("%" * 10 + "\nDesign Cost\n" + "%" * 10) for qk, s in design1.status.items(): print(s["assemblies"]) print("%" * 10 + "\nLibraryDesign Cost\n" + "%" * 10) for qk, s in design2.status.items(): print(s["assemblies"]) design2.report().plot_coverage(show=True) print(design2.to_df()[1])
def test_reindex_invariant(reindex): design1, library = Design.fake( n_designs=1, n_linear_seqs=50, n_cyclic_seqs=50, n_primers_from_templates=500, shared_length=500, return_with_library=True, ) designs = library["design"] plasmids = library["cyclic"] fragments = library["linear"] primers = library["short"] new_designs = [design[reindex:] + design[:reindex] for design in designs] make_cyclic(new_designs) design2 = Design() design2.add_materials(templates=plasmids, fragments=fragments, primers=primers, queries=new_designs) design1.run() design2.run() results1 = design1.out() results2 = design2.out() assemblies1 = list(results1["designs"].values())[0]["assemblies"][0] assemblies2 = list(results2["designs"].values())[0]["assemblies"][0] print(assemblies1["cost"]) print(assemblies2["cost"]) assert assemblies1["cost"] == assemblies2["cost"]
def example_design(self, span_cost): goal = random_record(3000) make_circular_and_id([goal]) r1 = random_record(100) + goal[1000:2000] + random_record(100) p1 = goal[970:1030] p2 = goal[1970:2030].reverse_complement() r2 = goal[2000:] + goal[:1000] p3 = goal[1970:2030] make_linear_and_id([r1, p1, p2, r2, p3]) design = Design(span_cost) design.add_materials(primers=[p1, p2, p3], templates=[r1, r2], queries=[goal], fragments=[]) design.compile() results = design.optimize() return design, results
def test_design_optimize_cannot_run_before_compile(cached_span_cost): design = Design.fake(n_designs=1) with pytest.raises(DasiDesignException): design.optimize() design.compile() design.optimize()
def test_example_0(args): i0, i1, DesignCls = args random.seed(0) np.random.seed(0) def open_gb(path): with open(join(fixtures, path)) as f: return list(SeqIO.parse(f, format="genbank")) fragments = open_gb("fragments_0.gb") primers = open_gb("primers_0.gb") plasmids = open_gb("plasmids_0.gb") goals = open_gb("goals_0.gb") design = Design() design.add_fragments(fragments) design.add_primers(primers) design.add_templates(plasmids) s = slice(i0, i1) design.add_queries(goals[s]) design.run(n_jobs=4) # assert successful runs print(design.status) for v in design.status.values(): assert v["success"] is True # output JSON out = design.out() print(out) print(design.to_df()[1])
def run(self, n_jobs: int = 10): """Run a design job. :param n_jobs: number of parrallel jobs to run. (default: 10) :return: """ import warnings warnings.simplefilter(action="ignore", category=RuntimeWarning) warnings.simplefilter(action="ignore", category=BiopythonParserWarning) self._logger.info("Loading sequence files") primers = make_linear(load_fasta_glob(self._primers)) templates = make_circular(load_genbank_glob(self._templates)) fragments = make_linear(load_genbank_glob(self._fragments)) goals = make_circular(load_genbank_glob(self._goals)) design = Design() design.n_jobs = n_jobs design.add_materials(primers=primers, templates=templates, fragments=fragments, queries=goals) self._logger.info("Getting span cost model") span_cost = self._get_span_cost() design.span_cost = span_cost self._logger.info("Compiling possible molecular assemblies") design.compile() self._logger.info("Optimizing molecular assemblies") design.optimize() self._logger.info("Designing assembly primers and fragments") df, adf, design_json = design.to_df() adf.to_csv("summary.csv") df.to_csv("sequence_design.csv") records = [] for result in design.results.values(): if result.assemblies: a = result.assemblies[0] for i, role, m in a.molecules: records.append(m.sequence) SeqIO.write(records, os.path.join(self._directory, "sequences.gb"), "genbank")
def design(self): d = Design.fake(n_designs=3) d.run() return d