Пример #1
0
    def test_minimizers(self):
        """
        ganon build with window size (minimizers)
        """
        params = self.default_params.copy()
        params["db_prefix"] = self.results_dir + "test_minimizers"
        params["window_size"] = 23
        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
        ibf_with_minimizers = params["db_prefix"] + ".ibf"

        # without minimizers comparison
        params["window_size"] = 0
        params["db_prefix"] = self.results_dir + "test_without_minimizers"
        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
        ibf_without_minimizers = params["db_prefix"] + ".ibf"

        # Filter with minimizers should be smaller
        self.assertTrue(
            os.path.getsize(ibf_with_minimizers) <
            os.path.getsize(ibf_without_minimizers),
            "Filter with minimizers should be smaller")
Пример #2
0
    def test_header(self):
        """
        Test ganon table with different headers
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_header.tsv"
        params["header"] = "lineage"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # check if printed lineage on all headers (but one root)
        self.assertTrue(all(["|" in c for c in res["out_pd"].columns.values]),
                        "ganon table headers are wrong (lineage)")

        params["header"] = "taxid"
        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # check if printed taxid (just numeric for this specific test)
        self.assertTrue(
            all([c.isdigit() for c in res["out_pd"].columns.values]),
            "ganon table headers are wrong (taxid)")
Пример #3
0
def main(which: str = None, cfg=None, **kwargs):
    # 3 entry points:
    # main() without args, cfg is parsed from sys.argv
    # main(which, **kwargs) -> main("build", db_prefix="test", ...) generate config and run
    # main(cfg) run directly with Config()

    if cfg is None: cfg = Config(which, **kwargs)

    # Validate
    if not cfg.validate(): return False

    # Set paths
    if not cfg.set_paths(): return False

    tx_total = time.time()

    print_log("- - - - - - - - - -", cfg.quiet)
    print_log("   _  _  _  _  _   ", cfg.quiet)
    print_log("  (_|(_|| |(_)| |  ", cfg.quiet)
    print_log("   _|   v. " + str(cfg.version), cfg.quiet)
    print_log("- - - - - - - - - -", cfg.quiet)

    if cfg.which == 'build':
        ret = build(cfg)
    elif cfg.which == 'update':
        ret = update(cfg)
    elif cfg.which == 'classify':
        ret = classify(cfg)
    elif cfg.which == 'report':
        ret = report(cfg)

    print_log(
        "Total elapsed time: " + str("%.2f" % (time.time() - tx_total)) +
        " seconds.", cfg.quiet)
    return ret
Пример #4
0
    def test_update_complete_add_remove(self):
        """
        Test run update complete adding and removing sequences (reusing same bins for new sequences)
        """
        params = self.default_params.copy()
        params[
            "output_db_prefix"] = self.results_dir + "test_update_complete_add_remove"
        params["update_complete"] = True
        params[
            "seq_info_file"] = data_dir + "update/bacteria_half_virus_seqinfo.txt"
        params["input_files"].extend([
            data_dir + "build/bacteria_NC_010333.1.fasta.gz",
            data_dir + "build/bacteria_NC_017164.1.fasta.gz"
        ])
        # Build config from params
        cfg = Config("update", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon update exited with an error")
        # General sanity check of results
        res = update_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon update has inconsistent results")
        # Specific tes - should have 6 taxid targets (2 bacteria, 4 viruses)
        self.assertEqual(res["bins_pd"]["taxid"].drop_duplicates().shape[0], 6,
                         "update failed to add new sequences")
        # Should re-use bins and reach max 18 (41 before)
        self.assertEqual(res["bins_pd"]["binid"].max(), 18,
                         "bins were not re-used")
        self.assertEqual(res["map_pd"]["binid"].max(), 18,
                         "bins were not re-used")

        # Classify against updated index
        params_classify = {
            "db_prefix": params["output_db_prefix"],
            "single_reads":
            [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"],
            "abs_cutoff": 0,
            "output_lca": True,
            "output_all": True,
            "quiet": True,
            "output_prefix": self.results_dir + "test_default"
        }

        # Build config from params
        cfg_classify = Config("classify", **params_classify)
        # Run
        self.assertTrue(ganon.main(cfg=cfg_classify),
                        "ganon classify exited with an error")
        # General sanity check of results
        res = classify_sanity_check_and_parse(vars(cfg_classify))
        self.assertIsNotNone(res, "ganon classify has inconsistent results")
        # Specific - no matches on the removed entries (taxid 1052684)
        self.assertFalse(res["all_pd"]["target"].isin(["1052684"]).any(),
                         "ganon classify has inconsistent results")
        # should return Viruses and Bacteria matches on the updated index
        self.assertTrue(
            res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"]
            ["name"].isin(["Bacteria", "Viruses"]).all(),
            "classification on updated index failed")
Пример #5
0
    def test_update_complete_add(self):
        """
        Test run update complete adding sequences only
        """
        params = self.default_params.copy()
        params[
            "output_db_prefix"] = self.results_dir + "test_update_complete_add"
        params["update_complete"] = True
        params[
            "seq_info_file"] = data_dir + "update/bacteria_virus_seqinfo.txt"
        params["input_files"].extend([
            data_dir + "build/bacteria_NC_010333.1.fasta.gz",
            data_dir + "build/bacteria_NC_017164.1.fasta.gz",
            data_dir + "build/bacteria_NC_017163.1.fasta.gz",
            data_dir + "build/bacteria_NC_017543.1.fasta.gz"
        ])

        # Build config from params
        cfg = Config("update", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon update exited with an error")
        # General sanity check of results
        res = update_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon update has inconsistent results")
        # Specific - check if number of bins increased
        self.assertTrue(res["map_pd"].binid.max() > 41, "no bins were added")

        # Classify simulated virus against updated index
        params_classify = {
            "db_prefix": params["output_db_prefix"],
            "single_reads":
            [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"],
            "abs_cutoff": 0,
            "output_lca": True,
            "output_all": True,
            "quiet": True,
            "output_prefix": self.results_dir + "test_update_complete_add"
        }
        # Build config from params
        cfg_classify = Config("classify", **params_classify)
        # Run
        self.assertTrue(ganon.main(cfg=cfg_classify),
                        "ganon classify exited with an error")
        # General sanity check of results
        res = classify_sanity_check_and_parse(vars(cfg_classify))
        self.assertIsNotNone(res, "ganon classify has inconsistent results")
        # Specific tes - should return Viruses and Bacteria matches on the updated index
        self.assertTrue(
            res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"]
            ["name"].isin(["Bacteria", "Viruses"]).all(),
            "classification on updated index failed")
Пример #6
0
    def test_update_complete_remove(self):
        """
        Test run update complete removing sequences only
        """
        params = self.default_params.copy()
        params[
            "output_db_prefix"] = self.results_dir + "test_update_complete_remove"
        params["update_complete"] = True
        params["seq_info_file"] = data_dir + "update/bacteria_half_seqinfo.txt"
        params["input_files"] = [
            data_dir + "build/bacteria_NC_010333.1.fasta.gz",
            data_dir + "build/bacteria_NC_017164.1.fasta.gz"
        ]

        # Build config from params
        cfg = Config("update", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon update exited with an error")
        # General sanity check of results
        res = update_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon update has inconsistent results")
        # Specific - keep only two entries
        self.assertEqual(res["bins_pd"]["seqid"].drop_duplicates().shape[0], 2,
                         "sequences not removed from bins")
        self.assertEqual(res["map_pd"]["target"].drop_duplicates().shape[0], 2,
                         "sequences not removed from .map")

        # Classify against reduced updated index
        params_classify = {
            "db_prefix": params["output_db_prefix"],
            "single_reads": data_dir + "bac.sim.1.fq",
            "abs_cutoff": 0,
            "output_lca": True,
            "output_all": True,
            "quiet": True,
            "output_prefix": self.results_dir + "test_default"
        }

        # Build config from params
        cfg_classify = Config("classify", **params_classify)
        # Run
        self.assertTrue(ganon.main(cfg=cfg_classify),
                        "ganon classify exited with an error")
        # General sanity check of results
        res = classify_sanity_check_and_parse(vars(cfg_classify))
        self.assertIsNotNone(res, "ganon classify has inconsistent results")
        # Specific - only matches on remaining sequences (taxids 366602 and 470)
        self.assertTrue(res["all_pd"]["target"].isin(["366602", "470"]).all(),
                        "ganon classify has inconsistent results")
Пример #7
0
    def test_no_rank_no_root_unclassified(self):
        """
        Test ganon table without specific --rank and no root reporting to unclassified
        """
        params = self.default_params.copy()
        params[
            "output_file"] = self.results_dir + "test_no_rank_no_root_unc.tsv"
        params["rank"] = ""
        params["header"] = "lineage"
        params["no_root"] = True
        params["unclassified_label"] = "unclassified"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output just bacteria
        self.assertEqual(res["out_pd"].columns.values.size, 50,
                         "ganon table without rank failed")
        # Test some outcomes
        self.assertTrue("2" in res["out_pd"].columns.values,
                        "ganon table without rank failed")
        self.assertTrue(
            "2|1239|909932|1843489|31977" in res["out_pd"].columns.values,
            "ganon table without rank failed")
        self.assertTrue("unclassified" in res["out_pd"].columns.values,
                        "ganon table without rank failed")

        # Sum of counts should be total of all reads with remaining root matches counted as unclassified
        self.assertEqual(res["out_pd"].sum().sum(), 3786439,
                         "ganon table without rank failed")
Пример #8
0
    def test_duplicated(self):
        """
        Test duplicated entries on update
        """
        params = self.default_params.copy()

        params["output_db_prefix"] = self.results_dir + "test_duplicated"
        params["input_files"].extend([
            data_dir + "build/bacteria_NC_010333.1.fasta.gz",
            data_dir + "build/bacteria_NC_017164.1.fasta.gz",
            data_dir + "build/bacteria_NC_017163.1.fasta.gz",
            data_dir + "build/bacteria_NC_017543.1.fasta.gz"
        ])
        params[
            "seq_info_file"] = data_dir + "update/bacteria_virus_seqinfo.txt"

        # Build config from params
        cfg = Config("update", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon update exited with an error")
        # General sanity check of results
        res = update_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon update has inconsistent results")
        # Specific test - check if there are no duplicates in the datastructure
        self.assertFalse(res["tax_pd"]["taxid"].duplicated().any(),
                         "duplicated entries on .tax after update")
        # Check if new map has any target from before in new bins
        map_before = parse_map(params["db_prefix"] + ".map")
        new_targets = res["map_pd"][
            res["map_pd"]["binid"] > map_before.binid.max()]
        self.assertFalse(
            map_before["target"].isin(new_targets["target"]).any(),
            "duplicated entries on .map after update")
Пример #9
0
    def test_specialization_file_single(self):
        """
        ganon build --specialization file (with one file only online: eutils)
        """
        params = self.default_params.copy()

        merge_gz(params["input_files"],
                 self.results_dir + "merged_input_files.fasta.gz")
        params[
            "input_files"] = self.results_dir + "merged_input_files.fasta.gz"
        params[
            "db_prefix"] = self.results_dir + "test_specialization_file_single"
        params["specialization"] = "file"

        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
        # Specific test - count files
        self.assertEqual(sum(res["tax_pd"]["rank"] == "file"), 4,
                         "failed to use file name as specialization")
        # Check if all targets starts with "NC_" - fails to use file specialization and replaces it with sequence accession
        self.assertTrue(
            (res["map_pd"]["target"].map(lambda x: x.startswith("NC_"))).all(),
            "failed to use sequence accession as specialization")
Пример #10
0
    def test_multiple_rep_files_split_hierachy(self):
        """
        Test run with multiple rep files as input
        """
        params = self.default_params.copy()
        params["rep_files"] = [
            data_dir + "report/results.rep", data_dir + "report/results2.rep"
        ]
        params[
            "output_prefix"] = self.results_dir + "test_multiple_rep_files_split_hierachy_"
        params["split_hierarchy"] = True

        # Build config from params
        cfg = Config("report", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon report exited with an error")
        # General sanity check of results
        res = report_sanity_check_and_parse(vars(cfg),
                                            sum_full_percentage=False)
        self.assertIsNotNone(res, "ganon report has inconsistent results")

        # should have 2+4 outputs (6 hiearchies)
        self.assertEqual(
            len(res), 6, "ganon report did not generate multiple report files")
Пример #11
0
    def test_split_hierachy(self):
        """
        Test run splitting hierachies
        """
        params = self.default_params.copy()
        params["output_prefix"] = self.results_dir + "test_split_hierachy"
        params["rep_files"] = [data_dir + "report/results2.rep"]
        params["split_hierarchy"] = True

        # Build config from params
        cfg = Config("report", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon report exited with an error")
        # General sanity check of results
        res = report_sanity_check_and_parse(vars(cfg),
                                            sum_full_percentage=False)
        self.assertIsNotNone(res, "ganon report has inconsistent results")

        # sum all root value
        total_root_split = 0
        for file, r in res.items():
            total_root_split += r["tre_pd"][
                r["tre_pd"]['rank'] == "root"]["cumulative_perc"].values[0]
        # sum one time unclassified
        total_root_split += r["tre_pd"][
            r["tre_pd"]['rank'] == "unclassified"]["cumulative_perc"].values[0]
        # values reported on root of splitted reports should equal 100
        self.assertEqual(int(total_root_split), 100,
                         "ganon report with wrong root counts")
Пример #12
0
    def test_no_rank(self):
        """
        Test ganon table without specific --rank
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_no_rank.tsv"
        params["rank"] = ""
        params["header"] = "lineage"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        self.assertEqual(res["out_pd"].columns.values.size, 50,
                         "ganon table without rank failed")
        # Test some outcomes
        self.assertTrue("1|2" in res["out_pd"].columns.values,
                        "ganon table without rank failed")
        self.assertTrue(
            "1|2|1239|909932|1843489|31977" in res["out_pd"].columns.values,
            "ganon table without rank failed")

        # Sum of counts should be total of all reads
        self.assertEqual(res["out_pd"].sum().sum(), 1973568,
                         "ganon table without rank failed")
Пример #13
0
    def test_bin_fragment_overlap_length(self):
        """
        Test changing bin, fragment and overlap length
        """
        params = self.default_params.copy()
        params[
            "db_prefix"] = self.results_dir + "test_bin_fragment_overlap_length"
        params["bin_length"] = 5692
        params["fragment_length"] = 667
        params["overlap_length"] = 349

        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
        # Specific test
        # Check max size of fragments on bins
        self.assertTrue(
            max(res["bins_pd"]["length"]) <=
            params["fragment_length"] + params["overlap_length"],
            "Fragment greater than max.")
        # Check max size of bins
        self.assertTrue(
            max(res["bins_pd"].groupby("binid").sum()["length"]) <=
            params["bin_length"], "Bin length greater than max.")
Пример #14
0
    def test_specialization_custom(self):
        """
        ganon build --specialization custom (with --seq-info-file)
        """
        params = self.default_params.copy()
        params["db_prefix"] = self.results_dir + "test_specialization_custom"
        params["specialization"] = "custom"

        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
        # Specific test
        # Check if all assembly ids are on bins and map and tax
        self.assertTrue(
            res["seq_info"]["specialization"].isin(
                res["bins_pd"]["specialization"]).all(),
            "Missing assembly ids on bins")
        self.assertTrue(
            res["seq_info"]["specialization"].isin(
                res["map_pd"]["target"].drop_duplicates()).all(),
            "Missing assembly ids on .map")
        self.assertTrue(
            res["seq_info"]["specialization"].isin(
                res["tax_pd"]["taxid"].drop_duplicates()).all(),
            "Missing assembly ids on .tax")
Пример #15
0
    def test_min_count_and_percentages(self):
        """
        Test run with min_percentage and min_count
        """
        params = self.default_params.copy()
        params[
            "output_prefix"] = self.results_dir + "test_min_count_and_percentages"
        params["min_percentage"] = 0.2
        params["min_count"] = 50

        # Build config from params
        cfg = Config("report", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon report exited with an error")
        # General sanity check of results
        res = report_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon report has inconsistent results")
        # check if none is higher than min_percentage
        self.assertTrue((res["tre_pd"][~res["idx_base"]]["cumulative"] >=
                         params["min_count"]).all(),
                        "ganon report failed filtering with --min-count")
        # check if none is higher than min_percentage
        self.assertTrue((res["tre_pd"][~res["idx_base"]]["cumulative_perc"] >=
                         params["min_percentage"]).all(),
                        "ganon report failed filtering with --min-percentage")
Пример #16
0
    def test_minimizers(self):
        """
        Test run with minimizers
        """

        build_params = {
            "taxdump_file":
            [data_dir + "mini_nodes.dmp", data_dir + "mini_names.dmp"],
            "input_files": [
                data_dir + "build/bacteria_NC_010333.1.fasta.gz",
                data_dir + "build/bacteria_NC_017164.1.fasta.gz",
                data_dir + "build/bacteria_NC_017163.1.fasta.gz",
                data_dir + "build/bacteria_NC_017543.1.fasta.gz"
            ],
            "seq_info_file":
            data_dir + "build/bacteria_seqinfo.txt",
            "write_seq_info_file":
            True,
            "window_size":
            23,
            "rank":
            "species",
            "quiet":
            True,
            "db_prefix":
            self.results_dir + "base_build_minimizers"
        }
        cfg_build = Config("build", **build_params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg_build),
                        "ganon build exited with an error")
        # General sanity check of results
        res_build = build_sanity_check_and_parse(vars(cfg_build))
        self.assertIsNotNone(res_build, "ganon build has inconsistent results")

        params = self.default_params.copy()
        params["output_prefix"] = self.results_dir + "test_minimizers"
        params["db_prefix"] = self.results_dir + "base_build_minimizers"
        params["rel_cutoff"] = 0.75
        params["rel_filter"] = 0.1
        cfg = Config("classify", **params)
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon classify exited with an error")
        res = classify_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon classify has inconsistent results")
Пример #17
0
    def test_default(self):
        """
        ganon update with default parameters (online: eutils, taxdump)
        """
        params = self.default_params.copy()
        params["output_db_prefix"] = self.results_dir + "test_default"
        params["taxdump_file"] = []
        # Build config from params
        cfg = Config("update", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon update exited with an error")
        # General sanity check of results
        res = update_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon update has inconsistent results")

        # Classify simulated virus against updated index
        params_classify = {
            "db_prefix": params["output_db_prefix"],
            "single_reads":
            [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"],
            "max_error": 0,
            "output_all": True,
            "quiet": True,
            "output_lca": True,
            "output_prefix": self.results_dir + "test_default"
        }

        # Build config from params
        cfg_classify = Config("classify", **params_classify)
        # Run
        self.assertTrue(ganon.main(cfg=cfg_classify),
                        "ganon classify exited with an error")
        # General sanity check of results
        res = classify_sanity_check_and_parse(vars(cfg_classify))
        self.assertIsNotNone(res, "ganon classify has inconsistent results")
        # Specific tes - should return Viruses and Bacteria matches on the updated index
        self.assertTrue(
            res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"]
            ["name"].isin(["Bacteria", "Viruses"]).all(),
            "classification on updated index failed")
Пример #18
0
 def test_default_offline(self):
     """
     Test run with default parameters
     """
     params = self.default_params.copy()
     params["db_prefix"] = self.results_dir + "test_default"
     # Build config from params
     cfg = Config("build", **params)
     # Run
     self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error")
     # General sanity check of results
     res = build_sanity_check_and_parse(vars(cfg))
     self.assertIsNotNone(res, "ganon build has inconsistent results")
Пример #19
0
def classify(cfg):
    print_log("Classifying reads (ganon-classify)", cfg.quiet)
    run_ganon_classify = " ".join([
        cfg.path_exec['classify'], "--single-reads " +
        ",".join(cfg.single_reads) if cfg.single_reads else "",
        "--paired-reads " +
        ",".join(cfg.paired_reads) if cfg.paired_reads else "", "--ibf " +
        ",".join([db_prefix + ".ibf"
                  for db_prefix in cfg.db_prefix]), "--map " +
        ",".join([db_prefix + ".map"
                  for db_prefix in cfg.db_prefix]), "--tax " +
        ",".join([db_prefix + ".tax"
                  for db_prefix in cfg.db_prefix]), "--hierarchy-labels " +
        ",".join(cfg.hierarchy_labels) if cfg.hierarchy_labels else "",
        "--max-error " +
        ",".join([str(me) for me in cfg.max_error]) if cfg.max_error else "",
        "--min-kmers " + ",".join([str(mk) for mk in cfg.min_kmers])
        if cfg.min_kmers else "", "--max-error-unique " +
        ",".join([str(meu) for meu in cfg.max_error_unique])
        if cfg.max_error_unique else "", "--strata-filter " +
        ",".join([str(sf)
                  for sf in cfg.strata_filter]) if cfg.strata_filter else "",
        "--offset " + str(cfg.offset) if cfg.offset else "",
        "--output-prefix " + cfg.output_prefix if cfg.output_prefix else "",
        "--output-all" if cfg.output_all else "",
        "--output-unclassified" if cfg.output_unclassified else "",
        "--output-single" if cfg.output_single else "",
        "--threads " + str(cfg.threads) if cfg.threads else "", "--n-reads " +
        str(cfg.n_reads) if cfg.n_reads is not None else "", "--n-batches " +
        str(cfg.n_batches) if cfg.n_batches is not None else "",
        "--verbose" if cfg.verbose else "", "--quiet" if cfg.quiet else ""
    ])
    stdout, stderr = run(run_ganon_classify)
    if not cfg.output_prefix: print(stdout)
    print_log(stderr, cfg.quiet)

    if cfg.output_prefix:
        report_params = {
            "db_prefix": cfg.db_prefix,
            "rep_file": cfg.output_prefix + ".rep",
            "output_prefix": cfg.output_prefix,
            "ranks": cfg.ranks,
            "output_format": "tsv",
            "verbose": cfg.verbose,
            "quiet": cfg.quiet
        }
        report_cfg = Config("report", **report_params)
        ret = report(report_cfg)
        return ret
    else:
        return True
Пример #20
0
 def test_default(self):
     """
     Test ganon table with default parameters
     """
     params = self.default_params.copy()
     params["output_file"] = self.results_dir + "test_default.tsv"
     
     # Build config from params
     cfg = Config("table", **params)
     # Run
     self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
     # General sanity check of results
     res = table_sanity_check_and_parse(vars(cfg))
     self.assertIsNotNone(res, "ganon table has inconsistent results")
Пример #21
0
    def test_specialization_on_default(self):
        """
        ganon update --specialization custom on previous generated index without specialiazazion
        """
        params = self.default_params.copy()
        params["db_prefix"] = data_dir + "bacteria_default"
        params[
            "output_db_prefix"] = self.results_dir + "test_specialization_on_default"
        params["specialization"] = "custom"

        # Build config from params
        cfg = Config("update", **params)
        # Should not run
        self.assertFalse(ganon.main(cfg=cfg),
                         "ganon update exited with an error")
Пример #22
0
    def test_default(self):
        """
        With default parameters online
        """
        params = self.default_params.copy()
        params["output_prefix"] = self.results_dir + "test_default"

        # report config from params
        cfg = Config("report", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon report exited with an error")
        # General sanity check of results
        res = report_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon report has inconsistent results")
Пример #23
0
    def test_invalid_rank(self):
        """
        ganon build --rank xyz (invalid)
        """
        params = self.default_params.copy()
        params["db_prefix"] = self.results_dir + "test_invalid_rank"
        params["rank"] = "xyz"

        # Build config from params
        cfg = Config("build", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg),
                        "ganon build exited with an error")
        # General sanity check of results
        res = build_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon build has inconsistent results")
Пример #24
0
    def test_min_percentage(self):
        """
        Test ganon table with --min-percentage
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_min_percentage.tsv"
        params["min_percentage"] = 0.01

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output just value higher than min_percentage (or zeros)
        self.assertTrue(((res["out_pd"]==0) | (res["out_pd"]>=params["min_percentage"])).all(axis=None) , "ganon table min count filter failed")
Пример #25
0
    def test_rank(self):
        """
        Test ganon table with --ranks
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_rank.tsv"
        params["rank"] = "superkingdom"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output just bacteria
        self.assertEqual(res["out_pd"].columns.values.size, 1, "ganon table rank selection failed")
Пример #26
0
 def test_input_directory(self):
     """
     Test run with default parameters using input directory and extension
     """
     params = self.default_params.copy()
     params["output_file"] = self.results_dir + "test_input_directory.tsv"
     del params["tre_files"]
     params["input_directory"] = data_dir+"table/"
     params["input_extension"] = ".tre"
    
     # Build config from params
     cfg = Config("table", **params)
     # Run
     self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
     # General sanity check of results
     res = table_sanity_check_and_parse(vars(cfg))
     self.assertIsNotNone(res, "ganon table has inconsistent results")
Пример #27
0
    def test_names_with(self):
        """
        Test ganon table with --names-with
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_names_with.tsv"
        params["names_with"] = "Prevotella"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output species with the name starting with Veillonella
        self.assertTrue(all("Prevotella" in r for r in res["out_pd"].columns.values), "ganon table names with filter failed")
Пример #28
0
    def test_names(self):
        """
        Test ganon table with --names
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_names.tsv"
        params["names"] = "Veillonella tobetsuensis"

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output species with the name Veillonella tobetsuensis
        self.assertEqual(res["out_pd"].columns.values, "Veillonella tobetsuensis", "ganon table names filter failed")
Пример #29
0
    def test_taxids_relative(self):
        """
        Test ganon table with --taxids not on the chosen rank
        """
        params = self.default_params.copy()
        params["output_file"] = self.results_dir + "test_taxids_relative.tsv"
        params["taxids"] = "838" # genus: Prevotella

        # Build config from params
        cfg = Config("table", **params)
        # Run
        self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error")
        # General sanity check of results
        res = table_sanity_check_and_parse(vars(cfg))
        self.assertIsNotNone(res, "ganon table has inconsistent results")
        # should output species of the genus 838 (Prevotella) only
        self.assertTrue(all("Prevotella" in r for r in res["out_pd"].columns.values), "ganon table taxids filter failed")
Пример #30
0
 def test_input_directory(self):
     """
     Test duplicated entries on the seqinfo file
     """
     params = self.default_params.copy()
     params["db_prefix"] = self.results_dir + "test_input_directory"
     del params["input_files"]
     params["input_directory"] = data_dir+"build/"
     params["input_extension"] = ".fasta.gz"
    
     # Build config from params
     cfg = Config("build", **params)
     # Run
     self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error")
     # General sanity check of results
     res = build_sanity_check_and_parse(vars(cfg))
     self.assertIsNotNone(res, "ganon build has inconsistent results")