def test_num_groups_vs_endpoints(here, paths, query, span_cost): primers = make_linear(load_fasta_glob(paths["primers"])) templates = load_genbank_glob(paths["templates"]) query_path = join(here, "data/test_data/genbank/designs", query) queries = make_circular(load_genbank_glob(query_path)) design = Design(span_cost) design.add_materials(primers=primers, templates=templates, queries=queries) design._blast() containers = design.container_list assert len(containers) == 1 container = containers[0] container.expand() groups = container.groups() print(len(groups)**2) a_arr = set() b_arr = set() for g in groups: a_arr.add(g.query_region.a) b_arr.add(g.query_region.b) print(len(a_arr) * len(b_arr))
def blast_factory(paths) -> BioBlastFactory: factory = BioBlastFactory() primers = make_linear(load_fasta_glob(paths[PRIMERS])) templates = load_genbank_glob(paths[REGISTRY]) queries = make_circular(load_genbank_glob(paths[QUERIES])) factory.add_records(primers, PRIMERS) factory.add_records(templates, TEMPLATES) factory.add_records(queries, QUERIES) return factory
def test_benchmark_blast(benchmark, here, paths, query): primers = make_linear(load_fasta_glob(paths["primers"])) templates = load_genbank_glob(paths["templates"]) query_path = join(here, "data/test_data/genbank/designs", query) queries = make_circular(load_genbank_glob(query_path)) design = Design() design.add_materials(primers=primers, templates=templates, queries=queries) design._blast() benchmark(design._blast)
def make_blast(): subjects = load_fasta_glob(join( here, "data/test_data/primers/primers.fasta"), force_unique_ids=True) subjects = make_linear(subjects) queries = load_genbank_glob( join( here, "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb" ), force_unique_ids=True, ) return BioBlast(subjects, queries)
def run(self, n_jobs: int = 10): """Run a design job. :param n_jobs: number of parrallel jobs to run. (default: 10) :return: """ import warnings warnings.simplefilter(action="ignore", category=RuntimeWarning) warnings.simplefilter(action="ignore", category=BiopythonParserWarning) self._logger.info("Loading sequence files") primers = make_linear(load_fasta_glob(self._primers)) templates = make_circular(load_genbank_glob(self._templates)) fragments = make_linear(load_genbank_glob(self._fragments)) goals = make_circular(load_genbank_glob(self._goals)) design = Design() design.n_jobs = n_jobs design.add_materials(primers=primers, templates=templates, fragments=fragments, queries=goals) self._logger.info("Getting span cost model") span_cost = self._get_span_cost() design.span_cost = span_cost self._logger.info("Compiling possible molecular assemblies") design.compile() self._logger.info("Optimizing molecular assemblies") design.optimize() self._logger.info("Designing assembly primers and fragments") df, adf, design_json = design.to_df() adf.to_csv("summary.csv") df.to_csv("sequence_design.csv") records = [] for result in design.results.values(): if result.assemblies: a = result.assemblies[0] for i, role, m in a.molecules: records.append(m.sequence) SeqIO.write(records, os.path.join(self._directory, "sequences.gb"), "genbank")
def _get_results_func(n_jobs): if True: print("PROCESSING!") primers = make_linear(load_fasta_glob(paths["primers"])) templates = load_genbank_glob(paths["templates"]) query_path = join(here, "data/test_data/genbank/designs/*.gb") queries = make_circular( load_genbank_glob(query_path))[:LIM_NUM_DESIGNS] design = Design(span_cost=cached_span_cost) design.add_materials(primers=primers, templates=templates, queries=queries) if n_jobs > 1: design._run_with_pool(n_jobs, 1) else: design.run() return design, design.results
def test_library_design_to_df_2(paths, here, span_cost): primers_path = join(here, "data/test_data_sd2", "primers.fasta") fragments_path = join(here, "data/test_data_sd2", "fragments", "*.gb") plasmids_path = join(here, "data/test_data_sd2", "plasmids", "*.gb") designs_path = join(here, "data/test_data_sd2", "designs", "*.gb") primers = make_linear(load_fasta_glob(primers_path)) templates = load_genbank_glob(plasmids_path) fragments = load_genbank_glob(fragments_path) print(fragments_path) queries = make_circular(load_genbank_glob(designs_path)) design = LibraryDesign(span_cost=span_cost) design.n_jobs = 1 design.add_materials( primers=primers, templates=make_circular(templates), queries=queries, fragments=make_linear(fragments), ) design.logger.set_level("DEBUG") design.compile() results = design.optimize() for result in results.values(): assembly = result.assemblies[0] print(assembly.to_df()) # a, b, c = design.to_df() a.to_csv("library_design.csv") b.to_csv("library_summary.csv") with open("designs.json", "w") as f: json.dump(c, f) print(a) print(b) print(c)
def test_bioblast_factory_init(here): subjects = load_genbank_glob(join(here, "data/test_data/genbank/templates/*.gb"), force_unique_ids=True) queries = load_genbank_glob(join(here, "data/test_data/genbank/designs/*.gb"), force_unique_ids=True) primers = load_fasta_glob(join(here, "data/test_data/primers/*.fasta")) factory = BioBlastFactory() factory.add_records(make_linear(primers), "primers") factory.add_records(queries, "queries") factory.add_records(subjects, "subjects") primer_blaster = factory("primers", "queries") template_blaster = factory("subjects", "queries") primer_results = primer_blaster.blastn_short() template_results = template_blaster.blastn() print(len(primer_results)) print(len(template_results))
def test_library_design_to_df(paths, here, span_cost): primers = make_linear(load_fasta_glob(paths["primers"])) templates = load_genbank_glob(paths["templates"]) query_path = join(here, "data/test_data/genbank/library_designs/*.gb") queries = make_circular(load_genbank_glob(query_path)) queries = queries design = LibraryDesign(span_cost=span_cost) design.n_jobs = 1 design.add_materials(primers=primers, templates=templates, queries=queries) design.logger.set_level("DEBUG") design.compile() results = design.optimize() print(results) a, b, c = design.to_df() a.to_csv("library_design.csv") b.to_csv("library_summary.csv") with open("designs.json", "w") as f: json.dump(c, f) print(a) print(b) print(c)