Пример #1
0
def test_num_groups_vs_endpoints(here, paths, query, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design(span_cost)

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()
    containers = design.container_list
    assert len(containers) == 1
    container = containers[0]
    container.expand()
    groups = container.groups()
    print(len(groups)**2)

    a_arr = set()
    b_arr = set()

    for g in groups:
        a_arr.add(g.query_region.a)
        b_arr.add(g.query_region.b)

    print(len(a_arr) * len(b_arr))
Пример #2
0
def blast_factory(paths) -> BioBlastFactory:
    factory = BioBlastFactory()

    primers = make_linear(load_fasta_glob(paths[PRIMERS]))
    templates = load_genbank_glob(paths[REGISTRY])
    queries = make_circular(load_genbank_glob(paths[QUERIES]))

    factory.add_records(primers, PRIMERS)
    factory.add_records(templates, TEMPLATES)
    factory.add_records(queries, QUERIES)

    return factory
Пример #3
0
def test_benchmark_blast(benchmark, here, paths, query):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design()

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()

    benchmark(design._blast)
Пример #4
0
    def make_blast():

        subjects = load_fasta_glob(join(
            here, "data/test_data/primers/primers.fasta"),
                                   force_unique_ids=True)
        subjects = make_linear(subjects)
        queries = load_genbank_glob(
            join(
                here,
                "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
            ),
            force_unique_ids=True,
        )
        return BioBlast(subjects, queries)
Пример #5
0
    def run(self, n_jobs: int = 10):
        """Run a design job.

        :param n_jobs: number of parrallel jobs to run. (default: 10)
        :return:
        """
        import warnings

        warnings.simplefilter(action="ignore", category=RuntimeWarning)
        warnings.simplefilter(action="ignore", category=BiopythonParserWarning)

        self._logger.info("Loading sequence files")
        primers = make_linear(load_fasta_glob(self._primers))
        templates = make_circular(load_genbank_glob(self._templates))
        fragments = make_linear(load_genbank_glob(self._fragments))
        goals = make_circular(load_genbank_glob(self._goals))
        design = Design()
        design.n_jobs = n_jobs
        design.add_materials(primers=primers,
                             templates=templates,
                             fragments=fragments,
                             queries=goals)

        self._logger.info("Getting span cost model")
        span_cost = self._get_span_cost()
        design.span_cost = span_cost

        self._logger.info("Compiling possible molecular assemblies")
        design.compile()

        self._logger.info("Optimizing molecular assemblies")
        design.optimize()

        self._logger.info("Designing assembly primers and fragments")
        df, adf, design_json = design.to_df()
        adf.to_csv("summary.csv")
        df.to_csv("sequence_design.csv")

        records = []
        for result in design.results.values():
            if result.assemblies:
                a = result.assemblies[0]
                for i, role, m in a.molecules:
                    records.append(m.sequence)

        SeqIO.write(records, os.path.join(self._directory, "sequences.gb"),
                    "genbank")
Пример #6
0
    def _get_results_func(n_jobs):
        if True:
            print("PROCESSING!")
            primers = make_linear(load_fasta_glob(paths["primers"]))
            templates = load_genbank_glob(paths["templates"])

            query_path = join(here, "data/test_data/genbank/designs/*.gb")
            queries = make_circular(
                load_genbank_glob(query_path))[:LIM_NUM_DESIGNS]

            design = Design(span_cost=cached_span_cost)
            design.add_materials(primers=primers,
                                 templates=templates,
                                 queries=queries)
            if n_jobs > 1:
                design._run_with_pool(n_jobs, 1)
            else:
                design.run()
            return design, design.results
Пример #7
0
def test_library_design_to_df_2(paths, here, span_cost):
    primers_path = join(here, "data/test_data_sd2", "primers.fasta")
    fragments_path = join(here, "data/test_data_sd2", "fragments", "*.gb")
    plasmids_path = join(here, "data/test_data_sd2", "plasmids", "*.gb")
    designs_path = join(here, "data/test_data_sd2", "designs", "*.gb")

    primers = make_linear(load_fasta_glob(primers_path))
    templates = load_genbank_glob(plasmids_path)
    fragments = load_genbank_glob(fragments_path)
    print(fragments_path)
    queries = make_circular(load_genbank_glob(designs_path))

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(
        primers=primers,
        templates=make_circular(templates),
        queries=queries,
        fragments=make_linear(fragments),
    )

    design.logger.set_level("DEBUG")
    design.compile()

    results = design.optimize()

    for result in results.values():
        assembly = result.assemblies[0]
        print(assembly.to_df())
    #

    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)
Пример #8
0
def test_bioblast_factory_init(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)
    queries = load_genbank_glob(join(here,
                                     "data/test_data/genbank/designs/*.gb"),
                                force_unique_ids=True)
    primers = load_fasta_glob(join(here, "data/test_data/primers/*.fasta"))

    factory = BioBlastFactory()
    factory.add_records(make_linear(primers), "primers")
    factory.add_records(queries, "queries")
    factory.add_records(subjects, "subjects")

    primer_blaster = factory("primers", "queries")
    template_blaster = factory("subjects", "queries")

    primer_results = primer_blaster.blastn_short()

    template_results = template_blaster.blastn()

    print(len(primer_results))
    print(len(template_results))
Пример #9
0
def test_library_design_to_df(paths, here, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/library_designs/*.gb")
    queries = make_circular(load_genbank_glob(query_path))
    queries = queries

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(primers=primers, templates=templates, queries=queries)

    design.logger.set_level("DEBUG")
    design.compile()
    results = design.optimize()
    print(results)
    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)