Пример #1
0
class TestScreen(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.output = os.path.join(self.test_folder, "output")
        self.tex_wig = os.path.join(self.test_folder, "tex")
        self.frag_wig = os.path.join(self.test_folder, "frag")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tex_wig)
            os.mkdir(self.frag_wig)
            os.mkdir(self.output)
        self.fasta = os.path.join(self.test_folder, "aaa.fa")
        gen_file(self.fasta, self.example.fasta)
        args = self.mock_args.mock()
        args.output_folder = self.output
        args.fasta = self.fasta
        self.screen = Screen(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_screenshot(self):
        gen_file(os.path.join(self.tex_wig, "tex_1_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "notex_1_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.frag_wig, "frag_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "tex_1_r.wig"), self.example.wig_r)
        gen_file(os.path.join(self.tex_wig, "notex_1_r.wig"), self.example.wig_r)
        gen_file(os.path.join(self.frag_wig, "frag_r.wig"), self.example.wig_r)
        args = self.mock_args.mock()
        args.fasta = self.fasta
        args.main_gff = os.path.join(self.test_folder, "main.gff")
        gen_file(args.main_gff, self.example.main_gff)
        side_gff = os.path.join(self.test_folder, "side.gff")
        args.side_gffs = [side_gff]
        gen_file(side_gff, self.example.side_gff)
        args.frag_wigs = self.frag_wig
        args.tex_wigs = self.tex_wig
        args.height = 1000
        args.tlibs = ["tex_1_f.wig:tex:1:a:+", "tex_1_r.wig:tex:1:a:-",
                      "notex_1_f.wig:notex:1:a:+", "notex_1_r.wig:notex:1:a:-"]
        args.flibs = ["frag_f.wig:frag:1:a:+", "frag_r.wig:frag:1:a:-"]
        args.present = "expand"
        args.output_folder = self.output
        self.screen.screenshot(args)
        self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "forward")))
        self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "reverse")))
        datas = import_data(os.path.join(self.output, "screenshots", "aaa", "forward.txt"))
        datas = import_data(os.path.join(self.output, "screenshots", "aaa", "reverse.txt"))
        self.assertEqual("\n".join(datas), self.example.out_r)

    def test_import_libs(self):
        texs = [["tex_1.wig", "tex", "1", "a", "+"], ["notex_1.wig", "notex", "1", "a", "+"]]
        lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []}
        self.screen._import_libs(texs, "+", self.tex_wig, lib_dict)
        self.assertDictEqual(lib_dict, {'fn': ['test_folder/tex/notex_1.wig'], 'rn': [],
                             'rt': [], 'ft': ['test_folder/tex/tex_1.wig'], 'rf': [], 'ff': []})
Пример #2
0
class TestExtractRBS(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_site(self):
        inters = [{"seq": "ATGGTGACCCAGGAGGTTGATCCCAGACGTAGGACCTGTTT"},
                  {"seq": "TTAGGACGTACTCCTCGAATGATCAACTGATACTTA"},
                  {"seq": "TTTTTTTTTAAAAAAAAAATATATATTTTTTTTTTT"}]
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.end_rbs = 14
        args.start_rbs = 5
        args.fuzzy_rbs = 2
        ribos = er.detect_site(inters, args)
        self.assertListEqual(ribos, [{'seq': 'TTAGGACGTACTCCTCGAATGATCAACTGATACTTA'}])

    def test_extract_seq(self):
        er.helper = Mock_Helper
        inters = er.extract_seq(self.example.gffs, self.example.seq,
                                self.example.tsss, self.example.tas, 5, 300)
        self.assertDictEqual(inters[0], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 2, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'})
        self.assertDictEqual(inters[1], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 1, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'})
Пример #3
0
class TestGenScreenshots(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_set_data_range(self):
        gff_dict = {"seq_id": "aaa", "source": "Refseq",
                    "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0",
                          "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_low, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,20\n")
        out.close()
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_high, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,510\n")

    def test_print_batch(self):
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        args = self.mock_args.mock()
        args.fasta = "fasta"
        args.main_gff = "main_gff"
        args.present = "expend"
        args.height = 1000
        args.side_gffs = ["test_folder/side1", "test_folder/side2"]
        gen_file("test_folder/side1", "test")
        gen_file("test_folder/side2", "test")
        args.output_folder = self.test_folder
        gs.print_batch(args, out, "+", lib_t, lib_n, lib_f, "test")
        self.assertEqual(out.getvalue(), self.example.out)

    def test_gen_batch(self):
        gs.import_wig = Mock_func().mock_import_wig
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        gff_dict = {"seq_id": "aaa", "source": "Refseq",
                    "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0",
                          "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"}
        gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq)
        self.assertEqual(out.getvalue(), self.example.out_print_wig)
Пример #4
0
class TestOptimizeTSS(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fastas = os.path.join(self.test_folder, "fasta")
        self.wigs = os.path.join(self.test_folder, "wigs")
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.manuals = os.path.join(self.test_folder, "manuals")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.wigs)
            os.mkdir(os.path.join(self.wigs, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.manuals)
            os.mkdir(os.path.join(self.manuals, "tmp"))

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_optimize_tss(self):
        opt.Helper = Mock_helper
        opt.Multiparser = Mock_multiparser
        opt.optimization = Mock_func().mock_optimization
        gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test")
        gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test")
        args = self.mock_args.mock()
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.wigs = self.wigs
        args.tsspredator_path = "test"
        args.manuals = self.manuals
        gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test")
        args.output_folder = self.test_folder
        args.project_strain = "test"
        args.height = 9
        args.height_reduction = 9
        args.factor = 9
        args.factor_reduction = 9
        args.base_height = 9
        args.enrichment = 9
        args.processing = 9
        args.utr = 200
        args.libs = "test"
        args.replicate_name = "test"
        args.cluster = 2
        args.strain_lengths = {"test": 100}
        args.cores = 4
        args.program = "TSS"
        args.replicate = 2
        args.steps = 2000
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        opt.optimize_tss(args, log)
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "test.csv")))
        log.close()
Пример #5
0
class TestOptimizeTSS(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fastas = os.path.join(self.test_folder, "fasta")
        self.wigs = os.path.join(self.test_folder, "wigs")
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.manuals = os.path.join(self.test_folder, "manuals")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.wigs)
            os.mkdir(os.path.join(self.wigs, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.manuals)
            os.mkdir(os.path.join(self.manuals, "tmp"))

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_optimize_tss(self):
        opt.Helper = Mock_helper
        opt.Multiparser = Mock_multiparser
        opt.optimization = Mock_func().mock_optimization
        gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test")
        gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test")
        args = self.mock_args.mock()
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.wigs = self.wigs
        args.tsspredator_path = "test"
        args.manuals = self.manuals
        gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test")
        args.output_folder = self.test_folder
        args.project_strain = "test"
        args.height = 9
        args.height_reduction = 9
        args.factor = 9
        args.factor_reduction = 9
        args.base_height = 9
        args.enrichment = 9
        args.processing = 9
        args.utr = 200
        args.libs = "test"
        args.replicate_name = "test"
        args.cluster = 2
        args.strain_lengths = {"test": 100}
        args.cores = 4
        args.program = "TSS"
        args.replicate = 2
        args.steps = 2000
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        opt.optimize_tss(args, log)
        self.assertTrue(
            os.path.exists(os.path.join(self.test_folder, "test.csv")))
        log.close()
Пример #6
0
class TestMEME(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.out_folder = "test_folder/output"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out_folder)
            os.mkdir(os.path.join(self.out_folder, "fasta_output"))
        self.tss_folder = os.path.join(self.test_folder, "tss_folder")
        if (not os.path.exists(self.tss_folder)):
            os.mkdir(self.tss_folder)
        self.gff_folder = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gff_folder)):
            os.mkdir(self.gff_folder)
        self.fa_folder = os.path.join(self.test_folder, "fa_folder")
        if (not os.path.exists(self.fa_folder)):
            os.mkdir(self.fa_folder)
        args = self.mock_args.mock()
        args.tsss = self.tss_folder
        args.fastas = self.fa_folder
        args.gffs = self.gff_folder
        args.output_folder = self.out_folder
        self.meme = MEME(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_move_and_merge_fasta(self):
        me.del_repeat_fasta = Mock_func().mock_del_repeat_fasta
        if (not os.path.exists("tmp")):
            os.mkdir("tmp")
        gen_file("tmp/primary.fa", "primary")
        gen_file("tmp/secondary.fa", "secondary")
        gen_file("tmp/internal.fa", "internal")
        gen_file("tmp/antisense.fa", "antisense")
        gen_file("tmp/orphan.fa", "orphan")
        self.meme._move_and_merge_fasta(self.test_folder, "test")
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_all_types.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_primary.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_secondary.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_internal.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_antisense.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_orphan.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_without_orphan.fa")))

    def test_split_fasta_by_strain(self):
        with open(os.path.join(self.fa_folder, "allstrain.fa"), "w") as fh:
            fh.write(""">aaa_aaa_aaa
ATTATATATA
>bbb_bbb_bbb
AATTAATTAA""")
        self.meme._split_fasta_by_strain(self.fa_folder)
        self.assertTrue(os.path.join(self.fa_folder, "aaa.fa"))
        self.assertTrue(os.path.join(self.fa_folder, "bbb.fa"))
Пример #7
0
class TestGetPolyT(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_genomes_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder, log)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        data = []
        with open(out_file) as fh:
            for line in fh:
                data.append(line)
        self.assertEqual(
            "".join(data),
            "Genome	Strand	Start	End	Protein_id	Go_term\ntest1\ntest2\n")
        log.close()
Пример #8
0
class TestGetPolyT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_genomes_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder, log)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        data = []
        with open(out_file) as fh:
            for line in fh:
                data.append(line)
        self.assertEqual("".join(data), "Genome	Strand	Start	End	Protein_id	Go_term\ntest1\ntest2\n")
        log.close()
Пример #9
0
class TestPotentialTarget(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_file(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        fasta, cdss_f, cdss_r, genes = pt.read_file(
            seq_file, gff_file, "test", ["CDS"])
        self.assertEqual(
            fasta,
            "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC")
        self.assertEqual(cdss_f[0].start, 1)
        self.assertEqual(cdss_f[0].feature, "CDS")
        self.assertEqual(cdss_r[0].start, 14)
        self.assertEqual(cdss_r[0].feature, "CDS")
        self.assertEqual(len(genes), 2)
        self.assertEqual(genes[0].start, 1)
        self.assertEqual(genes[1].start, 14)

    def test_deal_cds_forward(self):
        pt.deal_cds_forward(self.example.cdss_f, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_deal_cds_reverse(self):
        pt.deal_cds_reverse(self.example.cdss_r, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_potential_target(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.tar_start = 2
        args.tar_end = 10
        args.features = ["CDS"]
        pt.potential_target(gff_file, seq_file, self.test_folder, args)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.all_result)
Пример #10
0
class TestGenScreenshots(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_set_data_range(self):
        gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_low, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,20\n")
        out.close()
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_high, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,510\n")

    def test_print_batch(self):
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        args = self.mock_args.mock()
        args.fasta = "fasta"
        args.main_gff = "main_gff"
        args.present = "expend"
        args.height = 1000
        args.side_gffs = "side1 side2"
        args.output_folder = self.test_folder
        gs.print_batch(args, out, "+", lib_t, lib_n, lib_f, "test")
        self.assertEqual(out.getvalue(), self.example.out)

    def test_gen_batch(self):
        gs.import_wig = Mock_func().mock_import_wig
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"}
        gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq)
        self.assertEqual(out.getvalue(), self.example.out_print_wig)
Пример #11
0
class TestPotentialTarget(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_file(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        fasta, cdss_f, cdss_r, genes = pt.read_file(seq_file, gff_file, "test",
                                                    ["CDS"])
        self.assertEqual(
            fasta, "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC")
        self.assertEqual(cdss_f[0].start, 1)
        self.assertEqual(cdss_f[0].feature, "CDS")
        self.assertEqual(cdss_r[0].start, 14)
        self.assertEqual(cdss_r[0].feature, "CDS")
        self.assertEqual(len(genes), 2)
        self.assertEqual(genes[0].start, 1)
        self.assertEqual(genes[1].start, 14)

    def test_deal_cds_forward(self):
        pt.deal_cds_forward(self.example.cdss_f, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_deal_cds_reverse(self):
        pt.deal_cds_reverse(self.example.cdss_r, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_potential_target(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.tar_start = 2
        args.tar_end = 10
        args.features = ["CDS"]
        pt.potential_target(gff_file, seq_file, self.test_folder, args)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.all_result)
Пример #12
0
class TestGetPolyT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_strains_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        with open(out_file) as fh:
            for line in fh:
                self.assertEqual(line, "test1test2")
Пример #13
0
class TestGetPolyT(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_strains_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        with open(out_file) as fh:
            for line in fh:
                self.assertEqual(line, "test1test2")
class TestCoverageTerminator(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compare_ta(self):
        trans = read_dict(3, self.example.tran_dict, self.example.attributes_tran)
        dct.compare_ta(self.example.term_dict, trans, 5)
        express = []
        for term in self.example.term_dict:
            express.append(term["express"])
        self.assertListEqual(express, ["True", "True", "False"])

    def test_compare_transtermhp(self):
        hps = read_dict(3, self.example.hp_dict, self.example.attributes_term)
        terms = dct.compare_transtermhp(hps, self.example.term_dict)
        terms = sorted(terms, key=lambda x: (x["strain"], x["start"]))
        poss = []
        methods = []
        for term in terms:
            poss.append("_".join([str(term["start"]), str(term["end"])]))
            methods.append(term["method"])
        self.assertListEqual(poss, ['30_40', '350_367', '420_432', '1420_2429'])
        self.assertListEqual(methods, ['TransTermHP', 'forward_reverse&TransTermHP',
                                   'forward_reverse&TransTermHP', 'forward_reverse'])

    def test_compare_replicates(self):
        texs = {"track_tex_track_notex": 0}
        args = self.mock_args.mock()
        args.replicates = {"tex": 1, "frag": 1}
        args.tex_notex = 2
        cond = "texnotex"
        term_covers = [{"track": "track_tex", "high": 300,
                        "low": 50, "detect": "True",
                        "diff": 250, "type": "tex"},
                       {"track": "track_notex", "high": 200,
                        "low": 50, "detect": "True",
                        "diff": 150, "type": "notex"}]
        diff_cover, diff, term_datas, detect_num = \
            dct.compare_replicates(term_covers, texs, cond, args)
        self.assertEqual(diff_cover, 250)
        self.assertDictEqual(diff, {'track': 'track_tex', 'detect': 'True',
                                    'high': 300, 'low': 50, 'type': 'tex', 'diff': 250})
        ref_datas = [{'track': 'track_notex', 'detect': 'True', 'high': 200, 'low': 50, 'type': 'notex', 'diff': 150},
                     {'track': 'track_tex', 'detect': 'True', 'high': 300, 'low': 50, 'type': 'tex', 'diff': 250}]
        for index in range(0, 2):
            self.assertDictEqual(ref_datas[index], term_datas[index])
        self.assertEqual(detect_num, 1)
        args.replicates = {"tex": 1, "frag": 1}
        cond = "frag"
        term_covers = [{"track": "frag", "high": 10,
                        "low": 0, "detect": "False",
                        "diff": 10, "type": "frag"}]
        diff_cover, diff, term_datas, detect_num = \
            dct.compare_replicates(term_covers, texs, cond, args)
        self.assertEqual(diff_cover, 10)
        self.assertDictEqual(diff, {'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10})
        self.assertDictEqual(term_datas[0], {'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10})
        self.assertEqual(detect_num, 1)

    def test_coverage2term(self):
        dct.coverage_comparison = Mock_coverage().coverage_comparison
        hl_covers = {"low": 20, "high": 30}
        hl_poss = {"low": 1, "high": 2}
        term = {"start": 2, "end": 4}
        covers = [{"coverage": 100, "pos": 1, "type": "frag"},
                  {"coverage": 30, "pos": 2, "type": "frag"},
                  {"coverage": 23, "pos": 3, "type": "frag"},
                  {"coverage": 21, "pos": 4, "type": "frag"},
                  {"coverage": 21, "pos": 5, "type": "frag"},]
        term_covers = []
        args = self.mock_args.mock()
        args.fuzzy = 1
        args.decrease = 0.5
        dct.coverage2term(covers, term, hl_covers, hl_poss, "+",
                          term_covers, "track_1", args)
        self.assertDictEqual(term_covers[0], {'diff': 70, 'track': 'track_1', 'type': 'frag', 'high': 100, 'low': 30, 'detect': 'True'})

    def test_get_coverage(self):
        term = {"start": 2, "end": 4, "strain": "aaa", "strand": "+"}
        texs = {"track_tex_track_notex": 0}
        wigs = {"aaa": {"frag_1": {"track_1": [{"pos": 1, "coverage": 100, "type": "frag"},
                                               {"pos": 2, "coverage": 30, "type": "frag"},
                                               {"pos": 3, "coverage": 23, "type": "frag"},
                                               {"pos": 4, "coverage": 21, "type": "frag"},
                                               {"pos": 5, "coverage": 21, "type": "frag"}]}}}
        args = self.mock_args.mock()
        args.fuzzy = 1
        args.decrease = 0.5
        args.replicates = {"tex": 1, "frag": 1}
        args.tex_notex = 2
        diff_cover, diff, term_datas, detect_nums = dct.get_coverage(
                                                    term, wigs, "+", texs, args)
        self.assertEqual(diff_cover, 70)
        self.assertDictEqual(diff, {'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30})
        self.assertDictEqual(term_datas["frag_1"][0],
                             {'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30})
        self.assertDictEqual(detect_nums, {'frag_1': 1})

    def test_compare_term(self):
        terms = []
        term = {"miss": 5, "diff_cover": 30, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 4, "diff_cover": 30, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 6, "diff_cover": 80, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], {"miss": 4, "diff_cover": 30, "ut": 4})
        term = {"miss": 4, "diff_cover": 80, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 4, "diff_cover": 80, "ut": 6}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        self.assertDictEqual(terms[1], term)

    def test_first_term(self):
        detect_terms = {"detect": [], "undetect": []}
        detect = False
        term = {"detect_p": True, "detect_m": False}
        detect = dct.first_term("+", term, detect_terms, detect)
        self.assertTrue(detect)
        self.assertDictEqual(detect_terms["detect"][0], term)
        detect = False
        detect = dct.first_term("-", term, detect_terms, detect)
        self.assertFalse(detect)
        self.assertDictEqual(detect_terms["undetect"][0], term)

    def test_print_table(self):
        args = self.mock_args.mock()
        args.cutoff_coverage = 5
        args.table_best = True
        out_t = StringIO()
        term = {"express": "True", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30},
                                   {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set(["	True	track_1(diff=70;high=100;low=30)"]))
        out_t.close()
        out_t = StringIO()
        args.table_best = False
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set(["	True	track_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)"]))
        term = {"express": "False", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30},
                                   {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        out_t.close()
        out_t = StringIO()
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set(["	False	NA"]))
        term = {"express": "True", "diff_cover": -1, "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30},
                                   {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        out_t.close()
        out_t = StringIO()
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set(["	False	No_coverage_decreasing"]))
        out_t.close()

    def test_print2file(self):
        out = StringIO()
        out_t = StringIO()
        term = {"strain": "aaa", "express": "True", "diff_cover": 70,
                "strand": "+", "start": 2, "end": 4, "method": "TransTermHP",
                "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30},
                                   {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        args = self.mock_args.mock()
        args.cutoff_coverage = 5
        args.table_best = True
        dct.print2file(0, term, "70", "test", out, out_t, "test_method", args)
#        dct.print2file(0, term, "70", "test", out, out_t,
#                       "test_method", True, 5)
        self.assertEqual(set(out.getvalue().split("\n")[:-1]), set([self.example.gff_file]))
        self.assertEqual(set(out_t.getvalue().split("\n")[:-1]), set([self.example.table]))
        out.close()
        out_t.close()
Пример #15
0
class TestMergesRNA(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_modify_attributes(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"}
        tar1_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar1 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr"
        }
        tar2_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar2 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        ms.modify_attributes(pre, tar1, "UTR", "pre")
        self.assertEqual(pre.attributes["sRNA_type"], "3utr,5utr")
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar2 = Create_generator(tar2_dict, attributes_tar2, "gff")
        ms.modify_attributes(pre, tar2, "UTR", "pre")
        self.assertEqual(pre.attributes["sRNA_type"], "5utr")
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        ms.modify_attributes(pre, tar1, "UTR", "current")
        self.assertEqual(pre.attributes["sRNA_type"], "5utr")
        self.assertEqual(tar1.attributes["sRNA_type"], "3utr,5utr")

    def test_detect_overlap(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"}
        tar1_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar1 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr"
        }
        tar2_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 53,
            "end": 233,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar2 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        tar2 = Create_generator(tar2_dict, attributes_tar2, "gff")
        overlap = False
        overlap = ms.detect_overlap(tar1, pre, "UTR", overlap)
        self.assertTrue(overlap)
        overlap = False
        overlap = ms.detect_overlap(tar2, pre, "UTR", overlap)
        self.assertFalse(overlap)

    def test_modify_overlap(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr",
            "with_TSS": "NA",
            "start_cleavage": "cleavage_1,cleavage_2",
            "end_cleavage": "NA"
        }
        tar_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 5,
            "end": 30,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar = Create_generator(tar_dict, attributes_tar, "gff")
        pre_srna = ms.modify_overlap(pre, tar)
        self.assertEqual(pre_srna.attributes["with_TSS"], "TSS_1")
        self.assertEqual(pre_srna.attributes["start_cleavage"],
                         "cleavage_1,cleavage_2,cleavage3")
        self.assertEqual(pre_srna.attributes["end_cleavage"], "cleavage10")
        self.assertEqual(pre_srna.start, 3)
        self.assertEqual(pre_srna.end, 33)

    def test_merge_srna(self):
        srnas = ms.merge_srna(self.example.srnas_utr, "UTR")
        self.assertEqual(len(srnas), 2)
        self.assertEqual(srnas[0].start, 3)
        self.assertEqual(srnas[1].start, 54)
        self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1")
        self.assertEqual(srnas[1].attributes["with_TSS"], "TSS_3")
        self.assertEqual(srnas[0].attributes["start_cleavage"],
                         "cleavage_1,cleavage_2,cleavage_3")
        self.assertEqual(srnas[1].attributes["start_cleavage"], "cleavage_4")
        srnas = ms.merge_srna(self.example.srnas_int, "inter")
        self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1")
        self.assertEqual(srnas[1].attributes["with_TSS"], "NA")

    def test_merge_srna_gff(self):
        out_file = os.path.join(self.test_folder, "test_out")
        gen_file(os.path.join(self.test_folder, "aaa.gff"),
                 self.example.gff_file)
        ms.read_gff = Mock_func().mock_read_gff
        gffs = {"merge": out_file, "utr": "UTR", "normal": "inter"}
        ms.merge_srna_gff(gffs, False, 0.5,
                          os.path.join(self.test_folder, "aaa.gff"))
        datas, attributes = extract_info(out_file, "file")
        self.assertListEqual(datas, [
            'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.',
            'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.'
        ])
        self.assertEqual(
            set(attributes[0]),
            set([
                'overlap_percent=NA', 'end_cleavage=cleavage_40',
                'start_cleavage=cleavage_4', 'Name=sRNA_00000',
                'with_TSS=TSS_3', 'ID=srna0', 'sRNA_type=interCDS',
                'overlap_cds=NA'
            ]))
        self.assertEqual(
            set(attributes[1]),
            set([
                'overlap_percent=NA', 'end_cleavage=NA', 'Name=sRNA_00001',
                'with_TSS=NA', 'ID=srna1', 'sRNA_type=intergenic',
                'overlap_cds=NA'
            ]))

    def test_compare_table(self):
        ms.replicate_comparison = Mock_func().mock_replicate_comparison
        wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}}
        tables = [{
            "strain": "aaa",
            "name": "sRNA_1",
            "start": 3,
            "end": 4,
            "strand": "+",
            "libs": "track_1",
            "detect": "True",
            "avg": 30,
            "high": 100,
            "low": 20,
            "detail": "detail"
        }]
        srna_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 4,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_srna = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10",
            "overlap_cds": "CDS1",
            "overlap_percent": "0.01415"
        }
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        tss_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "TSS",
            "start": 3,
            "end": 3,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tss = {"ID": "tss0", "Name": "TSS_0", "type": "Orphan"}
        tss = Create_generator(tss_dict, attributes_tss, "gff")
        out = StringIO()
        cutoff_tex = [0, 0, 0, 50, 20]
        cutoff_notex = [0, 0, 0, 30, 10]
        cutoff_frag = [400, 200, 0, 50, 30]
        gen_file("tmp_median", "aaa\t3utr\ttrack_1\t10")
        args = self.mock_args.mock()
        args.replicates = replicates = {"tex": 1, "frag": 1}
        args.texs = texs = {"track_tex_track_notex": 0}
        args.out_folder = os.getcwd()
        args.table_best = True
        args.tex_notex = 2
        ms.compare_table(srna, tables, "utr", wigs, wigs, texs, out, [tss],
                         args)
        self.assertEqual(
            out.getvalue(),
            "aaa\tsrna_0\t3\t4\t+\tfrag_1\t1\tTSS_1;cleavage3\tcleavage10\t22.0\t23\t21\ttrack_1(avg=22.0;high=23;low=21)\tCDS1\t0.01415\n"
        )
        os.remove("tmp_median")

    def test_get_coverage(self):
        wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}}
        srna_cover = ms.get_coverage(wigs, self.example.srnas_int[0])
        self.assertEqual(srna_cover["frag_1"], [{
            'low': 21,
            'track': 'track_1',
            'avg': 1.3548387096774193,
            'final_end': 33,
            'high': 21,
            'pos': 0,
            'final_start': 3,
            'type': 'frag'
        }])

    def test_get_tss_pro(self):
        srna_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 4,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_srna = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10"
        }
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        tss_pro = ms.get_tss_pro("utr", srna)
        self.assertEqual(tss_pro, ('TSS_1;cleavage3', 'cleavage10'))
Пример #16
0
class TestsTSSpredator(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.wigs = "test_folder/wigs"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.fastas = "test_folder/fastas"
        self.manual = "test_folder/manuals"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(self.out)
            os.mkdir(self.wigs)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(self.fastas)
            os.mkdir(self.manual)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.ta_files = self.trans
        args.gffs = self.gffs
        args.wig_folder = self.wigs
        args.fastas = self.fastas
        args.manual = self.manual
        self.tss = TSSpredator(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        if os.path.exists("tmp"):
            shutil.rmtree("tmp")

    def test_print_lib(self):
        out = StringIO()
        lib_list = [{"condition": 1, "replicate": "a", "wig": "test_1.wig"},
                    {"condition": 2, "replicate": "a", "wig": "test_2.wig"}]
        self.tss._print_lib(2, lib_list, out, self.wigs, "test", ["a"])
        self.assertEqual(out.getvalue(),
                         ("test_1a = test_folder/wigs/test_1.wig\n"
                          "test_2a = test_folder/wigs/test_2.wig\n"))

    def test_import_lib(self):
        out = StringIO()
        libs = ["test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-",
                "test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-"]
        gen_file(os.path.join(
            self.wigs, "test1_forward.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "test1_reverse.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "test1_TEX_forward.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "test1_TEX_reverse.wig_STRAIN_test.wig"), "test")
        self.tss._import_lib(
            libs, self.wigs, "test", out, "test.gff", "TSS", "test.fa")
        self.assertListEqual(
            out.getvalue().split("\n"), [
                "annotation_1 = test.gff",
                "fivePrimeMinus_1a = test_folder/wigs/test1_TEX_reverse.wig",
                "fivePrimePlus_1a = test_folder/wigs/test1_TEX_forward.wig",
                "genome_1 = test.fa", ""])

    def test_gen_config(self):
        os.mkdir(os.path.join(self.out, "MasterTables"))
        os.mkdir(os.path.join(self.wigs, "tmp"))
        config_file = os.path.join(self.test_folder, "config")
        libs = ["test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-",
                "test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-"]
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.program = "TSS"
        args.height = 0.3
        args.height_reduction = 0.2
        args.factor = 2.0
        args.factor_reduction = 0.5
        args.base_height = 0.00
        args.enrichment_factor = 2.0
        args.processing_factor = 1.5
        args.utr_length = 300
        args.cluster = 3
        args.repmatch = ["all_2"]
        args.libs = libs
        args.output_prefixs = ["test1"]
        args.specify_strains = None
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tss._gen_config(
            "test", args, self.gffs + "/tmp/test.gff",
            self.wigs + "/tmp", self.fastas + "/tmp/test.fa", config_file, log)
        datas = import_data(config_file)
        self.assertEqual("\n".join(datas), self.example.config)

    def test_set_gen_config(self):
        os.mkdir(os.path.join(self.fastas, "tmp"))
        os.mkdir(os.path.join(self.gffs, "tmp"))
        os.mkdir(os.path.join(self.wigs, "tmp"))
        os.mkdir(os.path.join(self.out, "MasterTables"))
        gen_file(os.path.join(self.fastas, "tmp/test.fa"), "test")
        gen_file(os.path.join(self.gffs, "tmp/test.gff"), "test")
        gen_file(os.path.join(
            self.wigs, "tmp/test1_forward.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "tmp/test1_reverse.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "tmp/test1_TEX_forward.wig_STRAIN_test.wig"), "test")
        gen_file(os.path.join(
            self.wigs, "tmp/test1_TEX_reverse.wig_STRAIN_test.wig"), "test")
        libs = ["test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-",
                "test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args = self.mock_args.mock()
        args.program = "TSS"
        args.height = 0.3
        args.height_reduction = 0.2
        args.factor = 2.0
        args.factor_reduction = 0.5
        args.base_height = 0.00
        args.enrichment_factor = 2.0
        args.processing_factor = 1.5
        args.utr_length = 300
        args.libs = libs
        args.out_folder = self.out
        args.cluster = 3
        args.repmatch = ["all_2"]
        args.specify_strains = None
        args.output_prefixs = ["test1"]
        self.tss._set_gen_config(args, self.test_folder, log)
        datas = import_data(os.path.join(self.test_folder, "config_test.ini"))
        self.assertEqual("\n".join(datas), self.example.config)

    def test_convert_gff(self):
        os.mkdir(os.path.join(self.out, "gffs"))
        os.mkdir(os.path.join(self.out, "MasterTables"))
        os.mkdir(os.path.join(self.out, "MasterTables/MasterTable_test"))
        gen_file(os.path.join(
            self.out,
            "MasterTables/MasterTable_test/MasterTable.tsv"),
                 self.example.master)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.program = "TSS"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tss._convert_gff(["test"], args, log)
        datas = import_data(os.path.join(self.out, "gffs/test_TSS.gff"))
        self.assertEqual("\n".join(datas), self.example.master_gff)

    def test_merge_wigs(self):
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        libs = ["test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-",
                "test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-"]
        self.tss._merge_wigs(self.wigs, "test", libs)
        datas = import_data(os.path.join("tmp", "merge_forward.wig"))
        self.assertEqual("\n".join(datas), "test_ftest_f")
        datas = import_data(os.path.join("tmp", "merge_reverse.wig"))
        self.assertEqual("\n".join(datas), "test_rtest_r")
        shutil.rmtree("tmp")

    def test_check_orphan(self):
        os.mkdir(os.path.join(self.out, "gffs"))
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        ts.check_orphan = self.mock.mock_check_orphan
        libs = ["test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-",
                "test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-"]
        args = self.mock_args.mock()
        args.program = "TSS"
        args.gffs = self.gffs
        args.libs = libs
        self.tss._check_orphan(["test"], self.wigs, args)
        self.assertTrue(os.path.exists(
            os.path.join(self.out, "gffs/test_TSS.gff")))

    def test_low_expression(self):
        ts.filter_low_expression = self.mock.mock_filter_low_expression
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        gen_file(os.path.join(self.gffs, "test_TSS.gff"),
                 self.example.tss_file)
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        libs = ["test1_TEX_forward.wig:tex:1:a:+",
                "test1_TEX_reverse.wig:tex:1:a:-",
                "test1_forward.wig:notex:1:a:+",
                "test1_reverse.wig:notex:1:a:-"]
        args = self.mock_args.mock()
        args.manual = "manual"
        args.libs = libs
        args.wig_folder = self.wigs
        args.program = "TSS"
        args.cluster = 3
        self.tss._low_expression(args, self.gffs)
        shutil.rmtree("tmp")
        datas = import_data(os.path.join(
            self.out, "statistics/test/stat_test_low_expression_cutoff.csv"))
        self.assertEqual("\n".join(datas),
                         "Genome\tCutoff_coverage\ntest\t100")

    def test_merge_manual(self):
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file)
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        os.mkdir(os.path.join(self.out, "gffs"))
        ts.merge_manual_predict_tss = self.mock.mock_merge_manual_predict_tss
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.manual = "test_folder/manuals/tmp"
        os.mkdir(args.manual)
        gen_file("test_folder/manuals/tmp/test.gff", "test")
        args.wig_folder = self.wigs
        args.out_folder = self.out
        args.program = "TSS"
        args.utr_length = 300
        args.libs = "libs"
        args.cluster = 3
        self.tss._merge_manual(["test"], args)
        self.assertTrue(os.path.exists(os.path.join(
            self.out,
            "statistics/test/stat_compare_TSSpredator_manual_test.csv")))
        self.assertTrue(os.path.exists(
            os.path.join(self.out, "gffs/test_TSS.gff")))
        shutil.rmtree(args.manual)

    def test_deal_with_overlap(self):
        ts.filter_tss_pro = self.mock.mock_filter_tss_pro
        gen_file(os.path.join(self.out, "test_TSS.gff"), self.example.tss_file)
        gen_file(os.path.join(self.test_folder, "test_processing.gff"),
                 self.example.tss_file)
        args = self.mock_args.mock()
        args.overlap_feature = "overlap"
        args.program = "TSS"
        args.cluster = 3
        args.overlap_gffs = self.test_folder
        self.tss._deal_with_overlap(self.out, args)
        self.assertTrue(os.path.exists(os.path.join(self.out, "test_TSS.gff")))

    def test_stat_tss(self):
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        ts.stat_tsspredator = self.mock.mock_stat_tsspredator
        ts.plot_venn = self.mock.mock_plot_venn
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        self.tss._stat_tss(["test"], "TSS", log)
        self.assertTrue(os.path.exists(os.path.join(
            self.out, "statistics/test/test_venn.png")))
        self.assertTrue(os.path.exists(os.path.join(
            self.out, "statistics/test/test_class.png")))

    def test_validate(self):
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file)
        os.mkdir(os.path.join(self.out, "gffs"))
        ts.validate_gff = self.mock.mock_validate_gff
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.utr_length = 300
        args.out_folder = self.out
        args.program = "tss"
        self.tss._validate(["test"], args, log)

    def test_compare_ta(self):
        self.tss.multiparser = self.mock_parser
        ts.stat_ta_tss = self.mock.mock_stat_ta_tss
        ta_path = os.path.join(self.trans, "tmp")
        os.mkdir(ta_path)
        os.mkdir(os.path.join(self.out, "gffs"))
        gen_file(os.path.join(ta_path, "test_transcript.gff"),
                 self.example.tran_file)
        args = self.mock_args.mock()
        args.fuzzy = 3
        args.trans = self.trans
        args.gffs = self.gffs
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tss._compare_ta(["test"], args ,log)
        self.assertTrue(os.path.exists(os.path.join(
            self.trans, "test_transcript.gff")))
        self.assertTrue(os.path.exists(os.path.join(
            self.out, "gffs/test_TSS.gff")))
Пример #17
0
class TestsRNATargetPrediction(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = "test_folder/gffs"
        self.srnas = "test_folder/srnas"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.seq = "test_folder/output/sRNA_seqs"
        self.rnaup = "test_folder/output/RNAup"
        self.rnaplex = "test_folder/output/RNAplex"
        self.merge = "test_folder/output/merge"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.gffs)
            os.mkdir(self.out)
            os.mkdir(self.srnas)
            os.mkdir(self.fastas)
            os.mkdir(self.rnaup)
            os.mkdir(self.rnaplex)
            os.mkdir(self.seq)
            os.mkdir(self.merge)
            os.mkdir(os.path.join(self.rnaup, "test"))
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.srnas = self.srnas
        args.fastas = self.fastas
        args.gffs = self.gffs
        self.star = sRNATargetPrediction(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_sort_srna_fasta(self):
        fasta = os.path.join(self.fastas, "test.fa")
        gen_file(fasta, ">aaa\nAAAAAAAA\n>bbb\nCCCC\n>ccc\nGGGGGGGGGGGG")
        self.star._sort_srna_fasta(fasta, "test", self.test_folder)
        datas = import_data(os.path.join(self.test_folder, "tmp_test_sRNA.fa"))
        self.assertListEqual(datas, ['>bbb', 'CCCC', '>aaa', 'AAAAAAAA', '>ccc', 'GGGGGGGGGGGG'])

    def test_read_fasta(self):
        fasta = os.path.join(self.fastas, "test.fa")
        gen_file(fasta, ">aaa\nAAAAAAAA")        
        seq = self.star._read_fasta(fasta)
        self.assertEqual(seq, "AAAAAAAA")

    def test_get_specific_seq(self):
        srna_file = os.path.join(self.test_folder, "aaa_sRNA.gff")
        seq_file = os.path.join(self.test_folder, "aaa.fa")
        srna_out = os.path.join(self.test_folder, "out")
        gen_file(srna_file, self.example.srna_file)
        gen_file(seq_file, self.example.seq_file)
        self.star._get_specific_seq(srna_file, seq_file, srna_out, ["aaa:+:5:8"])
        datas = import_data(srna_out)
        self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT')

    def test_gen_seq(self):
        srna_seq = os.path.join(self.out, "sRNA_seqs")
        tar_seq = os.path.join(self.out, "target_seqs")
        os.mkdir(os.path.join(self.srnas, "tmp"))
        os.mkdir(os.path.join(self.fastas, "tmp"))
        os.mkdir(os.path.join(self.gffs, "tmp"))
        os.mkdir(tar_seq)
        gen_file(os.path.join(self.srnas, "tmp", "aaa_sRNA.gff"), self.example.srna_file)
        gen_file(os.path.join(self.fastas, "tmp", "aaa.fa"), self.example.seq_file)
        gen_file(os.path.join(self.gffs, "tmp", "aaa.gff"), self.example.gff_file)
        args = self.mock_args.mock()
        args.query = ["aaa:+:5:8"]
        args.features = ["CDS"]
        args.tar_start = 3
        args.tar_end = 5
        self.star._gen_seq(["aaa"], args)
        datas = import_data(os.path.join(srna_seq, "aaa_sRNA.fa"))
        self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT')
        datas = import_data(os.path.join(tar_seq, "aaa_target_1.fa"))
        self.assertEqual("\n".join(datas), '>AAA_000001|CDS_00000\nTAAATTCC')

    def test_rna_plex(self):
        self.star._run_rnaplex = self.mock.mock_run_rnaplex
        self.star._run_rnaplfold = self.mock.mock_run_rnaplfold
        os.mkdir("test_folder/test")
        gen_file("test_folder/test/test_RNAplex.txt", "test")
        gen_file(os.path.join(self.test_folder, "aaa_RNAplex.txt"), self.example.rnaplex)
        args = self.mock_args.mock()
        args.vienna_path = "test"
        args.win_size_s = 5
        args.win_size_t = 5
        args.span_s = 5
        args.span_t = 5
        args.unstr_region_rnaplex_s = 5
        args.unstr_region_rnaplex_t = 5
        self.star._rna_plex(["test"], args)
        datas = import_data("test_folder/test/test_RNAplex.txt")
        self.assertEqual("\n".join(datas), "test")

    def test_get_continue(self):
        out_rnaup = os.path.join(self.test_folder, "rnaup.txt")
        gen_file(out_rnaup, self.example.rnaup)
        srnas = self.star._get_continue(out_rnaup)
        self.assertListEqual(srnas, ["srna437"])

    def test_rnaup(self):
        self.star._run_rnaup = self.mock.mock_run_rnaup
        gen_file(os.path.join(self.out, "sRNA_seqs/tmp_test_sRNA.fa"), ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG")
        gen_file(os.path.join(self.gffs, "test_target.fa"), ">AAA_000001|CDS_00000\nAAATTAATTAAATTCCGGCCGGCCGG")
        args = self.mock_args.mock()
        args.srnas = self.srnas
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.vienna_path = "test"
        args.out_folder = self.out
        args.core_up = 4
        self.star._rnaup(["test"], args)
        datas = import_data(os.path.join(self.out, "tmp1.fa"))
        self.assertEqual("\n".join(datas), ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG")

    def test_merge_rnaplex_rnaup(self):
        st.merge_srna_target = self.mock.mock_merge_srna_target
        args = self.mock_args.mock()
        args.srnas = self.srnas
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.program = "both"
        args.out_folder = self.out
        args.top = "top"
        self.star._merge_rnaplex_rnaup(["test"], args)
        datas = import_data(os.path.join(self.test_folder, "out"))
        self.assertEqual("\n".join(datas), "test_folder/output/RNAplex/test/test_RNAplex.txttest_folder/output/RNAup/test/test_RNAup.txt")
Пример #18
0
class TestTerminator(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.test_folder = "test_folder"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.gffs = "test_folder/gffs"
        self.srnas = "test_folder/srnas"
        self.trans = "test_folder/trans"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out)
            os.mkdir(self.fastas)
            os.mkdir(self.gffs)
            os.mkdir(self.srnas)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.out, "tables"))
            os.mkdir(os.path.join(self.out, "gffs"))
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(os.path.join(self.fastas, "tmp"))
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.trans = self.trans
        args.out_folder = self.out
        args.srnas = self.srnas
        self.term = Terminator(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        if os.path.exists("tmp_transterm"):
            shutil.rmtree("tmp_transterm")
        if os.path.exists("tmp_term_table"):
            shutil.rmtree("tmp_term_table")
        if os.path.exists("tmp_merge_gff"):
            shutil.rmtree("tmp_merge_gff")

    def test_convert_gff2rntptt(self):
        os.mkdir(os.path.join(self.srnas, "tmp"))
        gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.srnas, "aaa_sRNA.gff"),
                 self.example.srna_file)
        gen_file(os.path.join(self.fastas, "aaa.fa"), self.example.fasta_file)
        file_types, prefixs = self.term._convert_gff2rntptt(
            self.gffs, self.fastas, self.srnas)
        self.assertDictEqual(file_types, {'aaa': 'srna'})
        self.assertListEqual(prefixs, ['aaa'])

    def test_combine_annotation(self):
        test1 = os.path.join(self.test_folder, "test1.ptt")
        test2 = os.path.join(self.test_folder, "test2.ptt")
        gen_file(test1, self.example.ptt)
        gen_file(test2, self.example.ptt)
        files = [test1, test2]
        combine_file = os.path.join(self.test_folder, "combine")
        self.term._combine_annotation(combine_file, files)
        datas = import_data(combine_file)
        result = self.example.ptt.split("\n")[3:]
        self.assertEqual("\n".join(datas), "\n".join(result + result))

    def test_run_TransTermHP(self):
        self.term._TransTermHP = self.mock.mock_TransTermHP
        os.mkdir(os.path.join(self.gffs, "tmp/combine"))
        gen_file(os.path.join(self.gffs, "tmp/combine/aaa.ptt"),
                 self.example.ptt)
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.hp_folder = self.out
        self.term._run_TransTermHP(args)
        self.assertTrue(os.path.exists(os.path.join(self.out, "aaa")))

    def test_convert_to_gff(self):
        self.term.multiparser = self.mock_parser
        hp_folder = os.path.join(self.out, "aaa")
        os.mkdir(hp_folder)
        gen_file(os.path.join(hp_folder, "aaa_best_terminator_after_gene.bag"),
                 self.example.bag)
        os.mkdir("tmp_transterm")
        args = self.mock_args.mock()
        args.hp_folder = self.out
        args.gffs = self.gffs
        self.term._convert_to_gff(["aaa"], args)
        datas = import_data(
            "/home/silas/ANNOgesic/tmp_transterm/aaa_transtermhp.gff")
        self.assertEqual("\n".join(datas), self.example.gff_bag)

    def test_merge_sRNA(self):
        os.mkdir(os.path.join(self.srnas, "tmp"))
        self.term.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"),
                 self.example.srna_file)
        merge = self.term._merge_sRNA(self.srnas, ["aaa"], self.gffs)
        self.assertEqual(merge.split("/")[-1], "tmp_merge_gff")
        shutil.rmtree("tmp_merge_gff")

    def test_move_file(self):
        term_outfolder = self.gffs
        csv_outfolder = self.out
        gen_file(os.path.join(term_outfolder, "aaa_term.gff"),
                 self.example.term_file)
        if (not os.path.exists("tmp_term_table")):
            os.mkdir("tmp_term_table")
        gen_file("tmp_term_table/aaa_term_raw.csv", "test")
        self.term._move_file(term_outfolder, csv_outfolder)
        shutil.rmtree("tmp_term_table")
        self.assertTrue(
            "test_folder/output/gffs/all_candidates/aaa_term_all.gff")
        self.assertTrue(
            "test_folder/output/tables/all_candidates/aaa_term_all.csv")

    def test_compute_intersection_forward_reverse(self):
        self.term.multiparser = self.mock_parser
        te.intergenic_seq = self.mock.mock_intergenic_seq
        te.poly_t = self.mock.mock_poly_t
        te.detect_coverage = self.mock.mock_detect_coverage
        self.term._run_rnafold = self.mock.mock_run_rnafold
        term_outfolder = os.path.join(self.out, "gffs")
        csv_outfolder = os.path.join(self.out, "tables")
        args = self.mock_args.mock()
        args.trans = self.trans
        args.fastas = self.fastas
        args.tex_notex = "tex_notex"
        args.libs = "libs"
        args.replicates = "rep"
        args.RNAfold_path = "test"
        self.term._compute_intersection_forward_reverse(["aaa"],
                                                        self.test_folder,
                                                        "wig_path",
                                                        "merge_wigs", args)
        self.assertTrue(os.path.join(self.out, "inter_seq_aaa"))
        self.assertTrue(os.path.join(self.out, "inter_sec_aaa"))

    def test_compute_stat(self):
        term_outfolder = os.path.join(self.out, "gffs")
        csv_outfolder = os.path.join(self.out, "tables")
        te.stat_term = self.mock.mock_stat_term
        gen_file(
            os.path.join(term_outfolder, "all_candidates/aaa_term_all.gff"),
            self.example.term_file)
        gen_file(os.path.join(term_outfolder, "best/aaa_term.csv"),
                 self.example.term_file)
        gen_file(os.path.join(term_outfolder, "express/aaa_term.csv"),
                 self.example.term_file)
        gen_file(os.path.join(term_outfolder, "non_express/aaa_term.csv"),
                 self.example.term_file)
        args = self.mock_args.mock()
        args.stat = True
        args.out_folder = self.out
        self.term._compute_stat(args)
        self.assertTrue(
            os.path.exists(os.path.join(csv_outfolder,
                                        "express/aaa_term.csv")))
        self.assertTrue(
            os.path.exists(os.path.join(csv_outfolder, "best/aaa_term.csv")))
        self.assertTrue(
            os.path.exists(
                os.path.join(csv_outfolder, "non_express/aaa_term.csv")))

    def test_run_terminator(self):
        te.stat_term = self.mock.mock_stat_term
        te.intergenic_seq = self.mock.mock_intergenic_seq
        te.poly_t = self.mock.mock_poly_t
        te.detect_coverage = self.mock.mock_detect_coverage
        self.term.multiparser = self.mock_parser
        self.term._run_rnafold = self.mock.mock_run_rnafold
        self.term._TransTermHP = self.mock.mock_TransTermHP
        self.term._compare_term_tran = self.mock.mock_compare_term_tran
        self.term._remove_tmp_file = self.mock.mock_remove_tmp_file
        os.mkdir(os.path.join(self.srnas, "tmp"))
        os.mkdir(os.path.join(self.trans, "tmp"))
        gen_file(os.path.join(self.gffs, "tmp/aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"),
                 self.example.srna_file)
        gen_file(os.path.join(self.trans, "tmp/aaa_transcript.gff"),
                 self.example.tran_file)
        tex_wigs = os.path.join(self.test_folder, "tex")
        frag_wigs = os.path.join(self.test_folder, "frag")
        os.mkdir(tex_wigs)
        os.mkdir(frag_wigs)
        gen_file(os.path.join(frag_wigs, "frag.wig"), "text")
        gen_file(os.path.join(tex_wigs, "tex.wig"), "text")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.trans = self.trans
        args.srnas = self.srnas
        args.tex_wigs = tex_wigs
        args.frag_wigs = frag_wigs
        args.hp_folder = self.test_folder
        args.tex_notex = "tex_notex"
        args.wig_path = self.test_folder
        args.merge_wigs = self.test_folder
        args.RNAfold_path = "RNAfold_path"
        args.stat = True
        args.fuzzy_up_ta = 2
        args.fuzzy_up_gene = 2
        args.fuzzy_down_ta = 2
        args.fuzzy_down_gene = 2
        self.term.run_terminator(args)
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "tables/all_candidates")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "tables/express")))
        self.assertTrue(os.path.exists(os.path.join(self.out, "tables/best")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/all_candidates")))
        self.assertTrue(os.path.exists(os.path.join(self.out, "gffs/express")))
        self.assertTrue(os.path.exists(os.path.join(self.out, "gffs/best")))
Пример #19
0
class TestsRNAIntergenic(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.wig_folder = "test_folder/wigs"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.wig_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_data(self):
        gff_file = os.path.join(self.test_folder, "anno.gff")
        tran_file = os.path.join(self.test_folder, "tran.gff")
        pro_file = os.path.join(self.test_folder, "pro.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        args.ex_srna = False
        nums, cdss, tas, pros, genes, ncs = si.read_data(args)
        self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0} )
        self.assertEqual(cdss[0].start, 140)
        self.assertEqual(tas[0].start, 140)
        self.assertEqual(pros[0].start, 140)

    def test_read_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        tsss, num_tss = si.read_tss(tss_file)
        self.assertEqual(tsss[0].start, 140)

    def test_compare_ta_cds(self):
        detects = {"overlap": False}
        gffs = copy.deepcopy(self.example.gffs)
        tas = copy.deepcopy(self.example.tas)
        si.compare_ta_cds(gffs, tas[0], detects)
        self.assertDictEqual(detects, {'overlap': True})

    def test_compare_ta_tss(self):
        out_table = StringIO()
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        si.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        tsss = copy.deepcopy(self.example.tsss)
        si.compare_ta_tss(10, 2, 15, tas[0], tsss[0], 50, "cutoff",
                          20, "", args)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t10\t15\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t10\t15\t+\tNA\tNA\t"
                          "NA\tNA\tNA\tTSS:170_+\n"))
        si.get_coverage = get_coverage

    def test_print_file(self):
        string = "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t."
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {"high": 20, "low": 5, "best": 13,
                      "conds": {"cond1": "test1"},
                      "detail": [{"track": "test1", "high": 30,
                                  "low": 10, "avg": 15},
                                 {"track": "test2", "high": 25,
                                  "low": 13, "avg": 20}]}
        args = self.mock_args.mock()
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.table_best = False
        si.print_file(string, "TSS_160+", srna_datas, "intergenic",
                      args, "aaa")
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t10\t15\t+\tcond1\ttest1\t13\t20\t5\t"
                          "TSS_160+\ttest1(avg=15;high=30;low=10);"
                          "test2(avg=20;high=25;low=13)\n"))
        self.assertEqual(output.getvalue(),
                         ("aaa\tintergenic\tsRNA\t10\t15\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS_160+;best_avg_coverage=13;"
                          "best_high_coverage=20;best_low_coverage=5\n"))

    def test_detect_include_tss(self):
        si.get_coverage = self.mock.mock_get_coverage
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {"primary": 0, "secondary": 0, "internal": 0,
                    "antisense": 50, "orphan": 10}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = coverage
        args.file_type = "frag"
        args.break_tran = False
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        si.detect_include_tss(tas[0], args, None, args.wigs_f, args.wigs_r)
        si.get_coverage = get_coverage
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA"
                          "\tNA\tNA\tTSS:170_+\n"))

    def test_get_differential_cover(self):
        checks = {"detect_diff": True, "first": True}
        cover_sets = {"diff": 30, "low": 5, "high": 35}
        cover = 20
        poss = {"stop_point": 100}
        args = self.mock_args.mock()
        args.fuzzy_inter = 10
        args.decrease_inter = 200
        si.get_differential_cover(0, checks, cover_sets, poss, cover, args, 80)
        self.assertDictEqual(cover_sets, {'diff': 20, 'low': 20, 'high': 35})
        cover = 50
        poss = {"stop_point": 100}
        num = 20
        args.fuzzy_inter = 20
        si.get_differential_cover(num, checks, cover_sets, poss,
                                  cover, args, 80)
        self.assertDictEqual(poss, {"stop_point": 80})

    def test_check_coverage_pos(self):
        si.coverage_comparison = self.mock.mock_coverage_comparison
        cover_sets = {"low": 20, "high":30, "total": 90, "diff": 50}
        poss = {"high": 20, "low": 70, "stop_point": 70}
        tmps = {"total": 0, "toler": 10, "pos": 0}
        checks = {"detect_diff": True, "first": True}
        cover = {"coverage": 50, "pos": 80}
        detect = si.check_coverage_pos(30, 100, cover, 80, cover_sets,
                                       checks, poss, "+", 5)
        self.assertFalse(detect)
        self.assertDictEqual(poss, {'high': 20, 'stop_point': 70, 'low': 70})

    def test_get_best(self):
        args = self.mock_args.mock()
        args.tolerance = 5
        args.fuzzy_inter = 5
        args.decrease_inter = 50
        datas = si.get_best(self.example.wigs, "aaa", "+", 2, 20,
                            "normal", args, 10)
        self.assertDictEqual(datas, {'frag_1': [
            {'low': -1, 'high': -1, 'avg': 30.7, 'pos': 21,
             'type': 'frag', 'track': 'track_1'}]})

    def test_get_attribute_string(self):
        srna_datas = {'best': 23, 'low': 20, 'high': 35}
        data = si.get_attribute_string(srna_datas, "TSS_100+;Cleavage_150+",
                                       1, "sRNA_00001", "3utr", "aaa")
        self.assertEqual(data,
                         ("ID=aaa_srna1;Name=sRNA_sRNA_00001;sRNA_type=3utr;"
                          "with_TSS=TSS_100+;end_cleavage=Cleavage_150+;"
                          "best_avg_coverage=23;best_high_coverage=35;"
                          "best_low_coverage=20"))

    def test_check_pro(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        srna_datas = {"pos": 50}
        texs = {"track_1@AND@track_2"}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.texs = texs
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        pro_pos, new_srna_datas, detect_pro = si.check_pro(
            tas[0], 20, 70, srna_datas, "within", 5,
            self.example.wigs, 20, args)
        self.assertEqual(pro_pos, 190)
        self.assertDictEqual(new_srna_datas, {
            'best': 40, 'high': 50, 'low': 10, "pos": 5,
            "conds": {"cond1": "test1"}, "detail": None})
        self.assertEqual(detect_pro, "Cleavage:190_+")

    def test_exchange_to_pro(self):
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {"pos": 50, "best": 10, "high": 12}
        args = self.mock_args.mock()
        args.max_len = 300
        args.min_len = 30
        args.table_best = True
        args.replicates = "rep"
        args.tex_notex = "tex_notex"
        args.texs = "texs"
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.tolerance = 5
        si.replicate_comparison = self.mock.mock_replicate_comparison
        detect, srna_datas, pro = si.exchange_to_pro(
            args, srna_datas, tas[0], 20, 70, 10, self.example.wigs, 20)
        self.assertTrue(detect)
        self.assertDictEqual(srna_datas, {
            'best': 40, 'high': 50, 'low': 10, 'pos': 190,
            "conds": {"cond1": "test1"}, "detail": None})
        self.assertEqual(pro, "Cleavage:190_+")

    def test_get_tss_type(self):
        coverage = {"primary": 0, "secondary": 0, "internal": 0,
                    "antisense": 50, "orphan": 10}
        si.check_break_tran = self.mock.mock_check_break_tran
        cover = si.get_tss_type(self.example.tsss[0], coverage,
                                None, None, None, False)
        self.assertEqual(cover, 10)

    def test_detect_wig_pos(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        args = self.mock_args.mock()
        args.texs = "texs"
        args.replicates = "rep"
        args.max_len = 300
        args.min_len = 30
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tex_notex = "tex_notex"
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.table_best = True
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.tolerance = 5
        si.detect_wig_pos(self.example.wigs, tas[0], 20, 70, "TSS_160+",
                          10, 20, args)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t20\t190\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS_160+;end_cleavage=Cleavage:190_+;"
                          "best_avg_coverage=40;best_high_coverage=50;"
                          "best_low_coverage=10\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t20\t190\t+\tcond1\t"
                          "test1\t40\t50\t10\t\n"))

    def test_detect_longer(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {"primary": 0, "secondary": 0, "internal": 0,
                    "antisense": 50, "orphan": 10}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.file_type = "frag"
        args.break_tran = False
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = 20
        args.output = output
        args.out_table = out_table
        si.get_tss_type = self.mock.mock_get_tss_type
        si.detect_longer(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA"
                          "\tNA\tNA\tTSS:170_+\n"))

    def test_get_proper_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        coverage = {"primary": 0, "secondary": 0, "internal": 0,
                    "antisense": 50, "orphan": 10}
        tsss, num_tss = si.get_proper_tss(tss_file, coverage)
        self.assertEqual(tsss[0].start, 140)

    def test_check_srna_condition(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        notex = {"primary": 0, "secondary": 0, "internal": 0,
                 "antisense": 30, "orphan": 10}
        coverage = {"primary": 0, "secondary": 0, "internal": 0,
                    "antisense": 50, "orphan": 10}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.file_type = "frag"
        args.break_tran = False
        args.notex = notex
        args.output = output
        args.cutoff_coverage = coverage
        args.out_table = out_table
        si.check_srna_condition(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\t"
                          "NA\tNA\tNA\tTSS:170_+\n"))

    def test_intergenic_srna(self):
        si.read_libs = self.mock.mock_read_libs
        si.read_wig = self.mock.mock_read_wig
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tss_file = os.path.join(self.test_folder, "aaa_TSS.gff")
        tran_file = os.path.join(self.test_folder, "aaa_tran.gff")
        pro_file = os.path.join(self.test_folder, "aaa_processing.gff")
        wig_f_file = os.path.join(self.wig_folder, "wig_f.wig")
        wig_r_file = os.path.join(self.wig_folder, "wig_r.wig")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tss_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        output_file = os.path.join(self.test_folder, "output")
        output_table = os.path.join(self.test_folder, "table")
        coverage = [0, 0, 0, 50, 10]
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        args.tss_file = tss_file
        args.table_best = True
        args.cutoffs = coverage
        args.out_folder = self.test_folder
        args.file_type = "frag"
        args.cut_notex = coverage
        args.input_libs = "input_libs"
        args.wig_folder = self.wig_folder
        args.wig_f_file = wig_f_file
        args.wig_r_file = wig_r_file
        args.tss_source = True
        args.output_file = output_file
        args.output_table = output_table
        args.in_cds = False
        args.wigs_f = None
        args.wigs_r = None
        args.ex_srna = False
        si.intergenic_srna(args, args.input_libs, None,
                           args.wigs_f, args.wigs_r, tss_file)
        self.assertTrue(os.path.exists(output_file))
        self.assertTrue(os.path.exists(output_table))
Пример #20
0
class TestRibos(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.fastas = os.path.join(self.test_folder, "fastas")
        self.out_folder = os.path.join(self.test_folder, "output")
        self.database = os.path.join(self.test_folder, "database")
        self.seq_path = os.path.join(self.test_folder, "seqs")
        self.tables = os.path.join(self.out_folder, "tables")
        self.stat = os.path.join(self.out_folder, "statistics")
        self.scan = os.path.join(self.test_folder, "scan")
        self.tsss = os.path.join(self.test_folder, "tsss")
        self.trans = os.path.join(self.test_folder, "trans")
        self.out_gff = os.path.join(self.out_folder, "gffs")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.out_folder)
            os.mkdir(self.database)
            os.mkdir(self.seq_path)
            os.mkdir(os.path.join(self.out_folder, "tmp_table"))
            os.mkdir(os.path.join(self.out_folder, "tmp_scan"))
            os.mkdir(os.path.join(self.out_folder, "tmp_fasta"))
            os.mkdir(os.path.join(self.out_folder, "scan_Rfam"))
            os.mkdir(self.tables)
            os.mkdir(self.scan)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.ribos_out_folder = self.out_folder
        args.database = self.database
        args.tsss = self.tsss
        args.trans = self.trans
        args.program = 'riboswtich'
        self.ribo = Ribos(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_scan_extract_rfam(self):
        self.ribo._run_cmscan = self.mock.mock_run_cmscan
        rb.modify_table = self.mock.mock_modify_table
        rb.regenerate_seq = self.mock.mock_regenerate_seq
        rb.reextract_rbs = self.mock.mock_reextract_rbs
        prefixs = []
        gen_file(os.path.join(self.gffs, "tmp/test.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.seq_path, "test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"),
                 self.example.fasta_file)
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.fuzzy = 5
        args.fuzzy_rbs = 2
        args.utr = True
        args.output_all = "test"
        tmp_files = {
            "fasta": os.path.join(self.out_folder, "tmp_fasta"),
            "scan": "tmp_scan",
            "table": os.path.join(self.out_folder, "tmp_table")
        }
        rfam = "Rfam_.cm"
        suffixs = {
            "csv": "test.csv",
            "txt": "test_prescan.txt",
            "re_txt": "test_scan.txt",
            "re_csv": "test_scan.csv"
        }
        self.ribo._scan_extract_rfam(prefixs, args, tmp_files, suffixs, "test",
                                     rfam)
        self.assertListEqual(prefixs, ["test"])
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "tmp_fasta",
                             "test_regenerate.fa")))

    def test_merge_results(self):
        rb.stat_and_covert2gff = self.mock.mock_stat_and_covert2gff
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(
            os.path.join(self.out_folder, "tmp_table/test_riboswitch.csv"),
            self.example.table)
        gen_file(
            os.path.join(self.out_folder,
                         "tmp_scan/test_riboswitch_prescan.txt"),
            self.example.rescan_file)
        gen_file(
            os.path.join(self.out_folder, "tmp_scan/test_riboswitch_scan.txt"),
            self.example.rescan_file)
        gen_file(os.path.join(self.test_folder, "ids"), self.example.ids)
        gen_file(os.path.join(self.tables, "test_riboswitch.csv"),
                 self.example.table)
        gen_file('test_folder/output/tmp_table/test_test_scan.csv', "test")
        gen_file(
            os.path.join("test_folder/output", "tmp_fasta",
                         "test_regenerate.fa"), "test")
        gen_file('test_folder/output/tmp_scan/test_test_prescan.txt', "test")
        gen_file('test_folder/output/tmp_scan/test_test_scan.txt', "test")
        if not os.path.exists('test_folder/output/tmp_table/test_test.csv'):
            gen_file('test_folder/output/tmp_table/test_test.csv', "test")
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.ribos_id = os.path.join(self.test_folder, "ids")
        args.fuzzy = 3
        suffixs = {
            "csv": "test.csv",
            "txt": "test_prescan.txt",
            "re_txt": "test_scan.txt",
            "re_csv": "test_scan.csv"
        }
        tmp_files = {
            "fasta": os.path.join(self.out_folder, "tmp_fasta"),
            "scan": os.path.join(self.out_folder, "tmp_scan"),
            "table": os.path.join(self.out_folder, "tmp_table")
        }
        rfam = "Rfam_.cm"
        self.ribo._merge_results(args, os.path.join(self.out_folder,
                                                    "tmp_scan"), suffixs,
                                 tmp_files,
                                 os.path.join(self.out_folder, "tmp_scan"),
                                 os.path.join(self.out_folder, "scan_Rfam"),
                                 os.path.join(self.out_folder, "scan_Rfam"),
                                 os.path.join(self.out_folder,
                                              "gffs"), "riboswitch")
class TestCoverageTerminator(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compare_ta(self):
        trans = read_dict(3, self.example.tran_dict,
                          self.example.attributes_tran)
        dct.compare_ta(self.example.term_dict, trans, 5)
        express = []
        for term in self.example.term_dict:
            express.append(term["express"])
        self.assertListEqual(express, ["True", "True", "False"])

    def test_compare_transtermhp(self):
        hps = read_dict(3, self.example.hp_dict, self.example.attributes_term)
        terms = dct.compare_transtermhp(hps, self.example.term_dict)
        terms = sorted(terms, key=lambda x: (x["strain"], x["start"]))
        poss = []
        methods = []
        for term in terms:
            poss.append("_".join([str(term["start"]), str(term["end"])]))
            methods.append(term["method"])
        self.assertListEqual(poss, [
            '30_40', '350_367', '420_432', '1420_2429'])
        self.assertListEqual(methods, [
            'TransTermHP', 'forward_reverse,TransTermHP',
            'forward_reverse,TransTermHP', 'forward_reverse'])

    def test_compare_replicates(self):
        texs = {"track_tex_track_notex": 0}
        args = self.mock_args.mock()
        args.replicates = {"tex": ["all_1"], "frag": ["all_1"]}
        args.tex_notex = 2
        cond = "texnotex"
        term_covers = [{"track": "track_tex", "high": 300,
                        "low": 50, "detect": "True",
                        "diff": 250, "type": "tex"},
                       {"track": "track_notex", "high": 200,
                        "low": 50, "detect": "True",
                        "diff": 150, "type": "notex"}]
        diff_cover, diff, term_datas, detect_num = \
            dct.compare_replicates(term_covers, texs, cond, args)
        self.assertEqual(diff_cover, 250)
        self.assertDictEqual(diff, {'track': 'track_tex', 'detect': 'True',
                                    'high': 300, 'low': 50,
                                    'type': 'tex', 'diff': 250})
        ref_datas = [{'track': 'track_notex', 'detect': 'True', 'high': 200,
                      'low': 50, 'type': 'notex', 'diff': 150},
                     {'track': 'track_tex', 'detect': 'True', 'high': 300,
                      'low': 50, 'type': 'tex', 'diff': 250}]
        for index in range(0, 2):
            self.assertDictEqual(ref_datas[index], term_datas[index])
        self.assertEqual(detect_num, 1)
        args.replicates = {"tex": ["all_1"], "frag": ["all_1"]}
        cond = "frag"
        term_covers = [{"track": "frag", "high": 10,
                        "low": 0, "detect": "False",
                        "diff": 10, "type": "frag"}]
        diff_cover, diff, term_datas, detect_num = \
            dct.compare_replicates(term_covers, texs, cond, args)
        self.assertEqual(diff_cover, 10)
        self.assertDictEqual(diff, {'detect': 'False', 'type': 'frag',
                                    'low': 0, 'diff': 10,
                                    'track': 'frag', 'high': 10})
        self.assertDictEqual(term_datas[0], {
            'detect': 'False', 'type': 'frag', 'low': 0,
            'diff': 10, 'track': 'frag', 'high': 10})
        self.assertEqual(detect_num, 1)

    def test_coverage2term(self):
        dct.coverage_comparison = Mock_coverage().coverage_comparison
        hl_covers = {"low": 20, "high": 30}
        hl_poss = {"low": 1, "high": 2}
        term = {"start": 2, "end": 4}
        covers = [100, 30, 23, 21, 21]
        term_covers = []
        args = self.mock_args.mock()
        args.fuzzy = 1
        args.decrease = 0.5
        dct.coverage2term(covers, term, hl_covers, hl_poss, "+",
                          term_covers, "track_1", args, 0, 4, "frag")
        self.assertDictEqual(term_covers[0], {
            'diff': 70, 'track': 'track_1', 'type': 'frag',
            'high': 100, 'low': 30, 'detect': 'True'})

    def test_get_coverage(self):
        term = {"start": 2, "end": 4, "strain": "aaa", "strand": "+"}
        texs = {"track_tex_track_notex": 0}
        wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}}
        args = self.mock_args.mock()
        args.fuzzy = 1
        args.decrease = 0.5
        args.replicates = {"tex": ["all_1"], "frag": ["all_1"]}
        args.tex_notex = 2
        diff_cover, diff, term_datas, detect_nums = dct.get_coverage(
            term, wigs, "+", texs, args)
        self.assertEqual(diff_cover, 70)
        self.assertDictEqual(diff, {
            'track': 'track_1', 'high': 100, 'type': 'frag',
            'detect': 'True', 'diff': 70, 'low': 30})
        self.assertDictEqual(term_datas["frag_1"][0],
                             {'track': 'track_1', 'high': 100, 'type': 'frag',
                              'detect': 'True', 'diff': 70, 'low': 30})
        self.assertDictEqual(detect_nums, {'frag_1': 1})

    def test_compare_term(self):
        terms = []
        term = {"miss": 5, "diff_cover": 30, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 4, "diff_cover": 30, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 6, "diff_cover": 80, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], {"miss": 4, "diff_cover": 30, "ut": 4})
        term = {"miss": 4, "diff_cover": 80, "ut": 4}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        term = {"miss": 4, "diff_cover": 80, "ut": 6}
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        terms = dct.compare_term(term, terms)
        self.assertDictEqual(terms[0], term)
        self.assertDictEqual(terms[1], term)

    def test_first_term(self):
        detect_terms = {"detect": [], "undetect": []}
        detect = False
        term = {"detect_p": True, "detect_m": False}
        detect = dct.first_term("+", term, detect_terms, detect)
        self.assertTrue(detect)
        self.assertDictEqual(detect_terms["detect"][0], term)
        detect = False
        detect = dct.first_term("-", term, detect_terms, detect)
        self.assertFalse(detect)
        self.assertDictEqual(detect_terms["undetect"][0], term)

    def test_print_table(self):
        args = self.mock_args.mock()
        args.cutoff_coverage = 5
        args.table_best = True
        out_t = StringIO()
        term = {"express": "True", "diff_cover": 70,
                "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [
                    {"track": "track_1", "diff": 70, "high": 100, "low": 30},
                    {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set([
            "\tTrue\ttrack_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)track_1(diff=70;high=100;low=30)"]))
        out_t.close()
        out_t = StringIO()
        args.table_best = False
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set([
            "\tTrue\ttrack_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)track_1(diff=70;high=100;low=30)"]))
        term = {"express": "False", "diff_cover": 70,
                "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [
                    {"track": "track_1", "diff": 70, "high": 100, "low": 30},
                    {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        out_t.close()
        out_t = StringIO()
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set([
            "	False	NA"]))
        term = {"express": "True", "diff_cover": -1,
                "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [
                    {"track": "track_1", "diff": 70, "high": 100, "low": 30},
                    {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        out_t.close()
        out_t = StringIO()
        dct.print_table(term, out_t, args)
        self.assertEqual(set(out_t.getvalue().split("\n")), set([
            "	False	No_coverage_decreasing"]))
        out_t.close()

    def test_print2file(self):
        out = StringIO()
        out_t = StringIO()
        term = {"strain": "aaa", "express": "True", "diff_cover": 70,
                "strand": "+", "start": 2, "end": 4, "method": "TransTermHP",
                "diff": {"high": 100, "low": 30, "track": "track_1"},
                "datas": {"data": [
                    {"track": "track_1", "diff": 70, "high": 100, "low": 30},
                    {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}}
        args = self.mock_args.mock()
        args.cutoff_coverage = 5
        args.table_best = True
        dct.print2file(0, term, "70", "test", out, out_t, "test_method", args)
        self.assertEqual(set(out.getvalue().split("\n")[:-1]),
                         set([self.example.gff_file]))
        self.assertEqual(set(out_t.getvalue().split("\n")[:-1]),
                         set([self.example.table]))
        out.close()
        out_t.close()
class TestMergeRNAplexRNAup(unittest.TestCase):

    def setUp(self):
        self.test_folder = "test_project"
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.mkdir(self.test_folder)
        self.example = Example()
        self.mock_args = MockClass()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_energy(self):
        srna = {"energy": -2}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -5.3})
        srna = {"energy": -8}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -8.0})

    def test_print_rank_one(self):
        out = StringIO()
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        mrr.print_rank_one(self.example.srnas, out, "RNAplex", self.example.gffs, self.example.srna_gffs, args_tar)
        datas = convert_dict(out.getvalue().split("\n"))
        refs = convert_dict(self.example.out_print.split("\n"))
        self.assertDictEqual(datas, refs)

    def test_read_table(self):
        rnaplex = os.path.join(self.test_folder, "rnaplex")
        rnaup = os.path.join(self.test_folder, "rnaup")
        gen_file(rnaplex, self.example.rnaplex)
        gen_file(rnaup, self.example.rnaup)
        srnas = mrr.read_table(self.example.gffs, rnaplex, rnaup)
        self.assertDictEqual(srnas, {'RNAup': {'srna352': [{'target': 'srna1023', 'energy': 0},
                                    {'tar_pos': '571,576', 'target': 'SAOUHSC_00001|dnaA',
                                     'energy': -4.87, 'srna_pos': '20,25'},
                                    {'tar_pos': '14,30', 'target': 'SAOUHSC_00002',
                                     'energy': -5.91, 'srna_pos': '11,26'}]},
                                     'RNAplex': {'srna1023': [{'tar_pos': '571,576',
                                     'target': 'SAOUHSC_00001|dnaA', 'energy': -5.3, 'srna_pos': '20,25'}],
                                     'srna352': [{'tar_pos': '163,170', 'target': 'SAOUHSC_00001|dnaA',
                                     'energy': -1.91, 'srna_pos': '24,31'}]}})

    def test_get_srna_name(self):
        output = mrr.get_srna_name(self.example.srna_gffs, "srna0")
        self.assertEqual(output[0], 'sRNA_0')
        self.assertEqual(output[1].start, 6)

    def test_get_target_info(self):
        output = mrr.get_target_info(self.example.gffs, "AAA_00001")
        self.assertEqual(output.start, 100)

    def test_merge_base_rnaplex(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        merges = []
        overlap = mrr.merge_base_rnaplex(self.example.srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges)
        output = [['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-12', '7-15', '+', 'AAA_00002|dnaA',
                   '2348-2934', '2330-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2'],
                  ['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003',
                   '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'],
                  ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1']]
        count = 0
        for out in output:
            for data in overlap:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)

    def test_merge_base_rnaup(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        srnas = {"RNAplex": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}],
                             "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                             "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00002|dnaA", "energy": -3.43, "rank": 3, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}]},
                 "RNAup": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                     {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}],
                           "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                           "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}]}}
        merges = []
        mrr.merge_base_rnaup(srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges)
        output = [['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003',
                   '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'],
                  ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00002|dnaA',
                   '2348-2934', '2337-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2']]
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)
Пример #23
0
class TestsRNAClass(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_initiate(self):
        out = StringIO()
        key = "test"
        key_list = ["test"]
        class_name = "test_name"
        class_num = 0
        index = {}
        content = "testtest"
        sc.initiate(key, key_list, class_name, class_num, index, out, content)
        self.assertEqual(out.getvalue(), "1testtest\n")

    def test_print_stat_title(self):
        out_stat = StringIO()
        strain = "aaa"
        checks = {"limit": False, "first": True, "utr": False, "inter": False}
        srna_datas = {"aaa": self.example.srnas, "all": self.example.srnas}
        args = self.mock_args.mock()
        args.energy = 0
        args.nr_hits_num = 0
        args.import_info = ["tss", "sec_str", "blast_nr", "blast_srna"]
        class_num, index = sc.print_stat_title(checks, out_stat, strain, srna_datas,
                                               1, args)
        self.assertEqual(out_stat.getvalue(), """1 - the normalized(by length of sRNA) free energy change of secondary structure below to 0
2 - sRNA candidates start with TSS (3'UTR derived and interCDS sRNA also includes the sRNA candidates which start with processing site.)
3 - blast can not find the homology from nr database (the cutoff is 0).
4 - blast can not find the homology from sRNA database.
5 - blast can find the homology from sRNA database.
All strains:
""")

        self.assertEqual(class_num, 5)
        self.assertDictEqual(index, {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4,
                                     'nr_no_hit': 3, 'with_TSS': 2})

    def test_import_class(self):
        index = {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4,
                 'nr_no_hit': 3, 'with_TSS': 2}
        num_srna = 0
        datas_srna = {}
        datas = {"aaa": self.example.srnas}
        num = sc.import_class(5, datas_srna, datas, index, num_srna, "aaa",
                              "UTR_derived", "5utr", 0, 0)
        self.assertEqual(num, 1)
        self.assertEqual(datas_srna["class_4"][0].start, 230)

    def test_import_data(self):
        datas = {"aaa": self.example.srnas, "all": self.example.srnas}
        index = {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4,
                 'nr_no_hit': 3, 'with_TSS': 2}
        num_srna = {"total": 0, "intergenic": 0, "5'UTR_derived": 0,
                    "3'UTR_derived": 0, "interCDS": 0, "in_CDS": 0}
        checks = {"limit": False, "first": True, "utr": True,
                  "inter": True, "in_CDS": True, "antisense": False}
        datas_rna = sc.import_data(5, datas, index, num_srna,
                                   "aaa", checks, 0, 0)
        self.assertEqual(datas_rna["5'UTR_derived"]["class_4"][0].start, 230)
        self.assertEqual(datas_rna["interCDS"]["class_1"][0].start, 140)
        self.assertEqual(datas_rna["in_CDS"]["class_1"][0].start, 6166)
        self.assertEqual(datas_rna["intergenic"]["class_5"][0].start, 5166)

    def test_print_intersection(self):
        num_srna = {"total": 3, "intergenic": 1, "5'UTR_derived": 1,
                    "3'UTR_derived": 0, "interCDS": 1}
        gff_name = os.path.join(self.test_folder, "test")
        out_stat = StringIO()
        keys = ["class_1", "class_4", "class_2", "class_3", "class_5"]
        datas = {"class_1": self.example.srnas, "class_2": self.example.srnas,
                 "class_3": self.example.srnas, "class_4": self.example.srnas,
                 "class_5": self.example.srnas}
        sc.print_intersection(datas, keys, 3, gff_name, "total", out_stat)
        self.assertEqual(out_stat.getvalue(), "\tclass_1 and class_4 and class_2 and class_3 and class_5 = 4(1.3333333333333333)\n")
        results, attributes = extract_info(os.path.join(self.test_folder, "test"), "file")
        self.assertEqual("\n".join(results), self.example.gff_info)

    def test_read_file(self):
        srna_file = os.path.join(self.test_folder, "srna.gff")
        gen_file(srna_file, self.example.gff_file)
        srna_datas, strains, checks = sc.read_file(srna_file)
        self.assertEqual(srna_datas["aaa"][0].start, 140)
        self.assertEqual(srna_datas["aaa"][1].start, 230)
        self.assertEqual(srna_datas["bbb"][0].start, 5166)
        self.assertListEqual(strains, ['all', 'aaa', 'bbb'])
        self.assertDictEqual(checks, {'inter': True, 'limit': False, 'utr': True, 'antisense': False, 'in_CDS': True, 'first': True})

    def test_sort_keys(self):
        keys = ["class_3", "class_1", "class_5"]
        final_keys = sc.sort_keys(keys)
        self.assertListEqual(final_keys, ['class_1', 'class_3', 'class_5'])

    def test_classify_srna(self):
        out_stat_file = os.path.join(self.test_folder, "stat")
        srna_file = os.path.join(self.test_folder, "srna.gff")
        gen_file(srna_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.energy = 0
        args.nr_hits_num = 0
        args.in_cds = True
        args.import_info = ["tss", "sec_str"]
        sc.classify_srna(srna_file, self.test_folder, out_stat_file, args)
Пример #24
0
class TestTranscripSNP(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_data(self):
        snp_file = os.path.join(self.test_folder, "snp")
        gen_file(snp_file, self.example.snp_file)
        depth_file = os.path.join(self.test_folder, "depth")
        gen_file(depth_file, self.example.depth_file)
        args = self.mock_args.mock()
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        max_quals, snps, dess, raw_snps = ts.import_data(
            snp_file, args, 2, depth_file, 2)
        self.assertDictEqual(max_quals, {
            'NC_007795.1': 98.0, 'All_genome': 98.0})
        self.assertListEqual(snps, [
            {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.',
             'indel': -1, 'pos': 1, 'id': '.',
             'all_info': ("NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;DP4=0,0,"
                          "60,9;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"),
             'qual': 98.0,
             'info': ['DP=89', 'DP4=0,0,60,9', 'VDB=8.46526e-15'],
             'alt': 'A', 'ref': 'C', 'frac': -1, 'depth': 89, 'dp4_sum': 69},
            {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.',
             'indel': 22, 'pos': 6, 'id': '.',
             'all_info': ("NC_007795.1\t6\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;"
                          "IMF=0.536585;DP=41;VDB=9.36323e-14;DP4=0,0,40,0\t"
                          "GT:PL:DP\t0/1:60,0,55:40"),
             'qual': 26.9515,
             'info': ['INDEL', 'IDV=22', 'IMF=0.536585', 'DP=41',
                      'VDB=9.36323e-14', 'DP4=0,0,40,0'],
             'alt': 'AA', 'ref': 'A', 'frac': 0.536585, 'depth': 41,
             'dp4_sum': 40}])

    def test_check_overlap(self):
        snps = {"test": []}
        overlaps = [{"test": []}]
        ts.check_overlap(snps, overlaps)
        self.assertListEqual(overlaps, [{'test': [], 'print': True}])
        self.assertDictEqual(snps, {'test': [{'test': [], 'print': True}]})

    def test_overlap_position(self):
        qual_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'CA', 'strain': 'NC_007795.1',
                      'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\t"
                                   "DP=89;VDB=8.46526e-15;SGB=-0.693147\t"
                                   "GT:PL:DP\t1/1:125,184,0:87")},
                     {'filter': '.', 'pos': 22182, 'alt': 'C',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'A', 'strain': 'NC_007795.1', 
                      'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\t"
                                   "DP=89;VDB=8.46526e-15;SGB=-0.693147\t"
                                   "GT:PL:DP\t1/1:125,184,0:87")},
                     {'filter': '.', 'pos': 30000, 'alt': 'A',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'C', 'strain': 'NC_007795.1', 
                      'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\t"
                                   "DP=89;VDB=8.46526e-15;SGB=-0.693147\t"
                                   "GT:PL:DP\t1/1:125,184,0:87")}]
        conflicts, nooverlap = ts.overlap_position(qual_snps)
        self.assertListEqual(conflicts, [[
            {'strain': 'NC_007795.1', 'info': 'MQ=20',
             'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1,
             'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181,
             'filter': '.', 'id': '.',
             'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")},
            {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
             'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C',
             'depth': 89, 'print': True, 'pos': 22182, 'filter': '.',
             'id': '.',
             'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")}]])
        self.assertDictEqual(nooverlap, {1: [
            {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
             'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A',
             'depth': 89, 'print': True, 'pos': 22181, 'filter': '.',
             'id': '.',
             'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")},
            {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
             'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A',
             'depth': 89, 'print': True, 'pos': 30000, 'filter': '.',
             'id': '.',
             'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")}],
                                         2: [
            {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
             'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C',
             'depth': 89, 'print': True,
             'pos': 22182, 'filter': '.', 'id': '.',
             'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")},
            {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
             'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A',
             'depth': 89, 'print': True, 'pos': 30000, 'filter': '.',
             'id': '.',
             'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;"
                          "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t"
                          "1/1:125,184,0:87")}]})

    def test_stat(self):
        stat_file = os.path.join(self.test_folder, "stat")
        max_quals = {'NC_007795.1': 98.0, 'All_genome': 98.0}
        trans_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                       'frac': -1, 'depth': 89, 'indel': -1,
                       'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                       'ref': 'C', 'strain': 'NC_007795.1',
                       'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\t"
                                    "DP=89;VDB=8.46526e-15\tGT:PL:DP\t"
                                    "1/1:125,184,0:87")}]
        args = self.mock_args.mock()
        args.depth = 50
        args.fraction = 0.3
        args.quality = 20
        ts.stat(max_quals, trans_snps, 2, stat_file,
                self.test_folder + "/test", args, "best.csv")
        datas = import_data(stat_file + "_best.csv")
        self.assertEqual("\n".join(datas), self.example.stat)

    def test_plot_bar(self):
        ts.plot_bar([3, 10, 30, 45, 50], "NC_007795.1",
                    self.test_folder + "/test", "best.png")
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "test_NC_007795.1_SNP_QUAL_best.png")))

    def test_read_fasta(self):
        fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta_file, self.example.fasta_file)
        seqs = ts.read_fasta(fasta_file)
        self.assertListEqual(seqs, [{
            'NC_007795.1': 'AAATATATCAGCACCGTAGACGATAGAGTAGTAC'}])

    def test_gen_ref(self):
        refs = []
        snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;"
                              "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")},
                {'filter': '.', 'pos': 22500, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': ("NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;"
                              "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}]
        refs = ts.gen_ref(snps, 1, refs, 1)
        self.assertListEqual(refs, ['1:A', '1:A'])
        snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;"
                              "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")},
                {'filter': '.', 'pos': 22500, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': ("NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;"
                              "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}]
        refs = ts.gen_ref(snps, 1, refs, 2)
        self.assertListEqual(refs, [
            '1:A_1:A', '1:A_1:A', '1:A_1:A', '1:A_1:A'])

    def test_change(self):
        snp = {'filter': '.', 'pos': 1, 'alt': 'A',
                'frac': -1, 'depth': 89, 'indel': -1,
                'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                'ref': 'C', 'strain': 'NC_007795.1',
                'all_info': ("NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;"
                             "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}
        seq = {"num_mod": 3, "seq": "CCCCATATCAGCACCGTAGACGATAGAGTAGTAC"}
        ts.change(snp, seq)
        self.assertDictEqual(seq, {
            'num_mod': 3, 'seq': 'CCCaATATCAGCACCGTAGACGATAGAGTAGTAC'})

    def test_print_file(self):
        refs = {'NC_007795.1': ['1:A', '1:GT']}
        conflicts = [[{'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;"
                                    "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,"
                                    "184,0:87"),
                       'filter': '.', 'id': '.', 'frac': -1, 'indel': -1,
                       'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0,
                       'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89,
                       'pos': 1, 'print': True},
                      {'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t"
                                    "INDEL;IDV=22;IMF=0.536585;DP=41;"
                                    "VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,"
                                    "55:40"),
                       'filter': '.', 'id': '.', 'frac': 0.536585, 'indel': 22,
                       'alt': 'AA', 'info': 'VDB=9.36323e-14 GT:PL:DP',
                       'qual': 26.9515, 'ref': 'A', 'strain': 'NC_007795.1',
                       'depth': 41, 'pos': 2, 'print': True}]]
        values = [{'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;"
                                "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"),
                   'filter': '.', 'id': '.', 'frac': -1, 'indel': -1,
                   'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0,
                   'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1,
                   'print': True},
                  {'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;"
                                "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"),
                   'filter': '.', 'id': '.', 'frac': -1, 'indel': -1,
                   'alt': 'A', 'info': 'VDB=8.46526e-15', 'qual': 98.0,
                   'ref': 'C', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 7,
                   'print': True}]
        mod_seq_init = {'genome': 'NC_007795.1', 'num_mod': 0,
                        'seq': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}
        mod_seqs = [{'genome': 'NC_007795.1', 'num_mod': -1,
                     'seq': 'aGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'},
                    {'genome': 'NC_007795.1', 'num_mod': 0,
                     'seq': 'gtGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}]
        out_ref = StringIO()
        out_seq = os.path.join(self.test_folder, "seq")
        ts.print_file(refs, out_ref, conflicts, 1, values, mod_seq_init,
                      mod_seqs, out_seq, "NC_007795.1")
        self.assertEqual(
            out_ref.getvalue(),
            "1\t1\t1\tNC_007795.1\tNC_007795.1\n")
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_2.fa")))

    def test_gen_new_fasta(self):
        out_ref = StringIO()
        out_seq = os.path.join(self.test_folder, "seq")
        nooverlap = {1: [{'strain': 'NC_007795.1', 'print': True, 'id': '.',
                          'alt': 'A,GT', 'filter': '.', 'frac': -1,
                          'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 1,
                          'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\t"
                                       "DP=89;VDB=8.46526e-15\tGT:PL:DP\t"
                                       "1/1:125,184,0:87")},
                         {'strain': 'NC_007795.1', 'print': True, 'id': '.',
                          'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C',
                          'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 7,
                          'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;"
                                       "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,"
                                       "184,0:87")}],
                     2: [{'strain': 'NC_007795.1', 'print': True, 'id': '.',
                          'alt': 'AA', 'filter': '.', 'frac': 0.536585,
                          'ref': 'A', 'depth': 41,
                          'info': 'VDB=9.36323e-14 GT:PL:DP',
                          'indel': 22, 'qual': 26.9515, 'pos': 2,
                          'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t"
                                       "INDEL;IDV=22;IMF=0.536585;DP=41;"
                                       "VDB=9.36323e-14 GT:PL:DP\t0/1:60,"
                                       "0,55:40")},
                         {'strain': 'NC_007795.1', 'print': True, 'id': '.',
                          'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C',
                          'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 7,
                          'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;"
                                       "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,"
                                       "184,0:87")}]}
        seqs = [{'NC_007795.1': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}]
        conflicts = [[{'strain': 'NC_007795.1', 'print': True, 'id': '.',
                       'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA',
                       'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1,
                       'qual': 98.0, 'pos': 1,
                       'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\t"
                                    "DP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,"
                                    "184,0:87")},
                      {'strain': 'NC_007795.1', 'print': True, 'id': '.',
                       'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A',
                       'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP',
                       'indel': 22, 'qual': 26.9515, 'pos': 2,
                       'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t"
                                    "INDEL;IDV=22;IMF=0.536585;DP=41;"
                                    "VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,"
                                    "55:40")}]]
        ts.gen_new_fasta(nooverlap, seqs, out_ref, conflicts, out_seq)
        self.assertEqual(out_ref.getvalue(),
                         ("1\t1\t1\t1:A\tNC_007795.1\n"
                          "1\t1\t2\t1:GT\tNC_007795.1\n"
                          "2\t2\t1\tAll\tNC_007795.1\n"))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_2.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_2_1.fa")))

    def test_snp_detect(self):
        depth_file = os.path.join(self.test_folder, "depth")
        gen_file(depth_file, self.example.depth_file)
        fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta_file, self.example.fasta_final)
        snp_file = os.path.join(self.test_folder, "NC_007795.1.snp")
        gen_file(snp_file, self.example.snp_final)
        out_seq = os.path.join(self.test_folder, "seq")
        out_snp = os.path.join(self.test_folder, "snp")
        stat_file = os.path.join(self.test_folder, "stat")
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 5
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        ts.snp_detect(fasta_file, snp_file, depth_file, out_snp, out_seq,
                      2, stat_file, args, 2)
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_1_2.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "seq_NC_007795.1_2_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "snp_seq_reference.csv")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "snp_best.vcf")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "snp_NC_007795.1_SNP_QUAL_best.png")))
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "snp_NC_007795.1_SNP_QUAL_raw.png")))
Пример #25
0
class TestsRNAIntergenic(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.wig_folder = "test_folder/wigs"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.wig_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_data(self):
        gff_file = os.path.join(self.test_folder, "anno.gff")
        tran_file = os.path.join(self.test_folder, "tran.gff")
        pro_file = os.path.join(self.test_folder, "pro.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        nums, cdss, tas, pros, genes, ncs = si.read_data(args)
        self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0})
        self.assertEqual(cdss[0].start, 140)
        self.assertEqual(tas[0].start, 140)
        self.assertEqual(pros[0].start, 140)

    def test_read_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        tsss, num_tss = si.read_tss(tss_file)
        self.assertEqual(tsss[0].start, 140)

    def test_compare_ta_cds(self):
        detects = {"overlap": False}
        gffs = copy.deepcopy(self.example.gffs)
        tas = copy.deepcopy(self.example.tas)
        si.compare_ta_cds(gffs, tas[0], detects)
        self.assertDictEqual(detects, {'overlap': True})

    def test_compare_ta_tss(self):
        out_table = StringIO()
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        si.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        tsss = copy.deepcopy(self.example.tsss)
        si.compare_ta_tss(10, 2, 15, tas[0], tsss[0], 50, "cutoff", 20, "",
                          args)
        self.assertEqual(
            output.getvalue(),
            "aaa\tANNOgesic\tncRNA\t10\t15\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n"
        )
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t10\t15\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n")
        si.get_coverage = get_coverage

    def test_print_file(self):
        string = "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t."
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {
            "high":
            20,
            "low":
            5,
            "best":
            13,
            "conds": {
                "cond1": "test1"
            },
            "detail": [{
                "track": "test1",
                "high": 30,
                "low": 10,
                "avg": 15
            }, {
                "track": "test2",
                "high": 25,
                "low": 13,
                "avg": 20
            }]
        }
        args = self.mock_args.mock()
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.table_best = False
        si.print_file(string, "TSS_160+", srna_datas, "intergenic", args,
                      "aaa")
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t10\t15\t+\tcond1\ttest1\t13\t20\t5\tTSS_160+\ttest1(avg=15;high=30;low=10);test2(avg=20;high=25;low=13)\n"
        )
        self.assertEqual(
            output.getvalue(),
            "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS_160+;best_avg_coverage=13;best_high_coverage=20;best_low_coverage=5\n"
        )

    def test_detect_include_tss(self):
        si.get_coverage = self.mock.mock_get_coverage
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = coverage
        args.file_type = "frag"
        args.break_tran = False
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        si.detect_include_tss(tas[0], args, None, args.wigs_f, args.wigs_r)
        si.get_coverage = get_coverage
        self.assertEqual(
            output.getvalue(),
            "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n"
        )
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n")

    def test_get_differential_cover(self):
        checks = {"detect_diff": True, "first": True}
        cover_sets = {"diff": 30, "low": 5, "high": 35}
        cover = 20
        poss = {"stop_point": 100}
        args = self.mock_args.mock()
        args.fuzzy_inter = 10
        args.decrease_inter = 200
        si.get_differential_cover(0, checks, cover_sets, poss, cover, args, 80)
        self.assertDictEqual(cover_sets, {'diff': 20, 'low': 20, 'high': 35})
        cover = 50
        poss = {"stop_point": 100}
        num = 20
        args.fuzzy_inter = 20
        si.get_differential_cover(num, checks, cover_sets, poss, cover, args,
                                  80)
        self.assertDictEqual(poss, {"stop_point": 80})

    def test_check_coverage_pos(self):
        si.coverage_comparison = self.mock.mock_coverage_comparison
        cover_sets = {"low": 20, "high": 30, "total": 90, "diff": 50}
        poss = {"high": 20, "low": 70, "stop_point": 70}
        tmps = {"total": 0, "toler": 10, "pos": 0}
        checks = {"detect_diff": True, "first": True}
        cover = {"coverage": 50, "pos": 80}
        detect = si.check_coverage_pos(30, 100, cover, 80, cover_sets, checks,
                                       poss, "+", 5)
        self.assertFalse(detect)
        self.assertDictEqual(poss, {'high': 20, 'stop_point': 70, 'low': 70})

    def test_get_best(self):
        args = self.mock_args.mock()
        args.tolerance = 5
        args.fuzzy_inter = 5
        args.decrease_inter = 50
        datas = si.get_best(self.example.wigs, "aaa", "+", 2, 20, "normal",
                            args, 10)
        self.assertDictEqual(
            datas, {
                'frag_1': [{
                    'low': -1,
                    'high': -1,
                    'avg': 30.7,
                    'pos': 21,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            })

    def test_get_attribute_string(self):
        srna_datas = {'best': 23, 'low': 20, 'high': 35}
        data = si.get_attribute_string(srna_datas, "TSS_100+;Cleavage_150+", 1,
                                       "sRNA_00001", "3utr", "aaa")
        self.assertEqual(
            data,
            "ID=aaa_srna1;Name=sRNA_sRNA_00001;sRNA_type=3utr;with_TSS=TSS_100+;end_cleavage=Cleavage_150+;best_avg_coverage=23;best_high_coverage=35;best_low_coverage=20"
        )

    def test_check_pro(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        srna_datas = {"pos": 50}
        texs = {"track_1@AND@track_2"}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.texs = texs
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        pro_pos, new_srna_datas, detect_pro = si.check_pro(
            tas[0], 20, 70, srna_datas, "within", 5, self.example.wigs, 20,
            args)
        self.assertEqual(pro_pos, 190)
        self.assertDictEqual(
            new_srna_datas, {
                'best': 40,
                'high': 50,
                'low': 10,
                "pos": 5,
                "conds": {
                    "cond1": "test1"
                },
                "detail": None
            })
        self.assertEqual(detect_pro, "Cleavage:190_+")

    def test_exchange_to_pro(self):
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {"pos": 50, "best": 10, "high": 12}
        args = self.mock_args.mock()
        args.max_len = 300
        args.min_len = 30
        args.table_best = True
        args.replicates = "rep"
        args.tex_notex = "tex_notex"
        args.texs = "texs"
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.tolerance = 5
        si.replicate_comparison = self.mock.mock_replicate_comparison
        detect, srna_datas, pro = si.exchange_to_pro(args, srna_datas, tas[0],
                                                     20, 70, 10,
                                                     self.example.wigs, 20)
        self.assertTrue(detect)
        self.assertDictEqual(
            srna_datas, {
                'best': 40,
                'high': 50,
                'low': 10,
                'pos': 190,
                "conds": {
                    "cond1": "test1"
                },
                "detail": None
            })
        self.assertEqual(pro, "Cleavage:190_+")

    def test_get_tss_type(self):
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        si.check_break_tran = self.mock.mock_check_break_tran
        cover = si.get_tss_type(self.example.tsss[0], coverage, None, None,
                                None, False)
        self.assertEqual(cover, 10)

    def test_detect_wig_pos(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        args = self.mock_args.mock()
        args.texs = "texs"
        args.replicates = "rep"
        args.max_len = 300
        args.min_len = 30
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tex_notex = "tex_notex"
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.table_best = True
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.tolerance = 5
        si.detect_wig_pos(self.example.wigs, tas[0], 20, 70, "TSS_160+", 10,
                          20, args)
        self.assertEqual(
            output.getvalue(),
            "aaa\tANNOgesic\tncRNA\t20\t190\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS_160+;end_cleavage=Cleavage:190_+;best_avg_coverage=40;best_high_coverage=50;best_low_coverage=10\n"
        )
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t20\t190\t+\tcond1\ttest1\t40\t50\t10\t\n")

    def test_detect_longer(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.file_type = "frag"
        args.break_tran = False
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = 20
        args.output = output
        args.out_table = out_table
        si.get_tss_type = self.mock.mock_get_tss_type
        si.detect_longer(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(
            output.getvalue(),
            "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n"
        )
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n")

    def test_get_proper_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        tsss, num_tss = si.get_proper_tss(tss_file, coverage)
        self.assertEqual(tsss[0].start, 140)

    def test_check_srna_condition(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        notex = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 30,
            "orphan": 10
        }
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.file_type = "frag"
        args.break_tran = False
        args.notex = notex
        args.output = output
        args.cutoff_coverage = coverage
        args.out_table = out_table
        si.check_srna_condition(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(
            output.getvalue(),
            "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n"
        )
        self.assertEqual(
            out_table.getvalue(),
            "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n")

    def test_intergenic_srna(self):
        si.read_libs = self.mock.mock_read_libs
        si.read_wig = self.mock.mock_read_wig
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tss_file = os.path.join(self.test_folder, "aaa_TSS.gff")
        tran_file = os.path.join(self.test_folder, "aaa_tran.gff")
        pro_file = os.path.join(self.test_folder, "aaa_processing.gff")
        wig_f_file = os.path.join(self.wig_folder, "wig_f.wig")
        wig_r_file = os.path.join(self.wig_folder, "wig_r.wig")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tss_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        output_file = os.path.join(self.test_folder, "output")
        output_table = os.path.join(self.test_folder, "table")
        coverage = [0, 0, 0, 50, 10]
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        args.tss_file = tss_file
        args.table_best = True
        args.cutoffs = coverage
        args.out_folder = self.test_folder
        args.file_type = "frag"
        args.cut_notex = coverage
        args.input_libs = "input_libs"
        args.wig_folder = self.wig_folder
        args.wig_f_file = wig_f_file
        args.wig_r_file = wig_r_file
        args.tss_source = True
        args.output_file = output_file
        args.output_table = output_table
        args.in_cds = False
        args.wigs_f = None
        args.wigs_r = None
        si.intergenic_srna(args, args.input_libs, None, args.wigs_f,
                           args.wigs_r)
        self.assertTrue(os.path.exists(output_file))
        self.assertTrue(os.path.exists(output_table))
Пример #26
0
class TestRibos(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.fastas = os.path.join(self.test_folder, "fastas")
        self.out_folder = os.path.join(self.test_folder, "output")
        self.database = os.path.join(self.test_folder, "database")
        self.seq_path = os.path.join(self.test_folder, "seqs")
        self.tables = os.path.join(self.out_folder, "tables")
        self.stat = os.path.join(self.out_folder, "statistics")
        self.scan = os.path.join(self.test_folder, "scan")
        self.tsss = os.path.join(self.test_folder, "tsss")
        self.trans = os.path.join(self.test_folder, "trans")
        self.out_gff = os.path.join(self.out_folder, "gffs")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.out_folder)
            os.mkdir(self.database)
            os.mkdir(self.seq_path)
            os.mkdir(os.path.join(self.out_folder, "tmp_table"))
            os.mkdir(os.path.join(self.out_folder, "tmp_scan"))
            os.mkdir(os.path.join(self.out_folder, "tmp_fasta"))
            os.mkdir(os.path.join(self.out_folder, "scan_Rfam"))
            os.mkdir(self.tables)
            os.mkdir(self.scan)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.ribos_out_folder = self.out_folder
        args.database = self.database
        args.tsss = self.tsss
        args.trans = self.trans
        args.program = 'riboswtich'
        self.ribo = Ribos(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_scan_extract_rfam(self):
        self.ribo._run_cmscan = self.mock.mock_run_cmscan
        rb.modify_table = self.mock.mock_modify_table
        rb.regenerate_seq = self.mock.mock_regenerate_seq
        rb.reextract_rbs = self.mock.mock_reextract_rbs
        prefixs = []
        gen_file(os.path.join(self.gffs, "tmp/test.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.seq_path, "test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"),
                 self.example.fasta_file)
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.fuzzy = 5
        args.fuzzy_rbs = 2
        args.utr = True
        args.without_rbs = False
        args.rbs_seq = ["AGGAGG"]
        args.output_all = "test"
        args.cutoff = "e_0.01"
        tmp_files = {"fasta": os.path.join(self.out_folder, "tmp_fasta"),
                     "scan": "tmp_scan",
                     "table": os.path.join(self.out_folder, "tmp_table")}
        rfam = "Rfam_.cm"
        suffixs = {"csv": "test.csv",
                   "txt": "test_prescan.txt",
                   "re_txt": "test_scan.txt",
                   "re_csv": "test_scan.csv"}
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ribo._scan_extract_rfam(prefixs, args, tmp_files,
                                     suffixs, "test", rfam, log)
        self.assertListEqual(prefixs, ["test"])
        self.assertTrue(os.path.exists(os.path.join(
            self.out_folder, "tmp_fasta", "test_regenerate.fa")))

    def test_merge_results(self):
        rb.stat_and_covert2gff = self.mock.mock_stat_and_covert2gff
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) 
        gen_file(os.path.join(
            self.out_folder, "tmp_table/test_riboswitch.csv"),
                 self.example.table)
        gen_file(os.path.join(
            self.out_folder, "tmp_scan/test_riboswitch_prescan.txt"),
                 self.example.rescan_file)
        gen_file(os.path.join(
            self.out_folder, "tmp_scan/test_riboswitch_scan.txt"),
                 self.example.rescan_file)
        gen_file(os.path.join(
            self.test_folder, "ids"), self.example.ids)
        gen_file(os.path.join(
            self.tables, "test_riboswitch.csv"), self.example.table)
        gen_file('test_folder/output/tmp_table/test_test_scan.csv', "test")
        gen_file(os.path.join("test_folder/output", "tmp_fasta",
                              "test_regenerate.fa"), "test")
        gen_file('test_folder/output/tmp_scan/test_test_prescan.txt', "test")
        gen_file('test_folder/output/tmp_scan/test_test_scan.txt', "test")
        if not os.path.exists('test_folder/output/tmp_table/test_test.csv'):
            gen_file('test_folder/output/tmp_table/test_test.csv', "test")
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.ribos_id = os.path.join(self.test_folder, "ids")
        args.fuzzy = 3
        suffixs = {"csv": "test.csv",
                   "txt": "test_prescan.txt",
                   "re_txt": "test_scan.txt",
                   "re_csv": "test_scan.csv"}
        tmp_files = {"fasta": os.path.join(self.out_folder, "tmp_fasta"),
                     "scan": os.path.join(self.out_folder, "tmp_scan"),
                     "table": os.path.join(self.out_folder, "tmp_table")}
        rfam = "Rfam_.cm"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ribo._merge_results(
            args, os.path.join(self.out_folder, "tmp_scan"), suffixs,
            tmp_files, os.path.join(self.out_folder, "tmp_scan"),
            os.path.join(self.out_folder, "scan_Rfam"),
            os.path.join(self.out_folder, "scan_Rfam"),
            os.path.join(self.out_folder, "gffs"), "riboswitch", log)
Пример #27
0
class TestsORFDetection(unittest.TestCase):

    def setUp(self):
        self.mock = Mock_func()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.tsss = "test_folder/tsss"
        self.srnas = "test_folder/sRNA"
        self.out = "test_folder/output"
        self.trans = "test_folder/trans"
        self.fastas = "test_folder/fastas"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.gffs = "test_folder/gffs"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(self.out)
            os.mkdir(self.trans)
            os.mkdir(self.fastas)
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(self.srnas)
            os.mkdir(self.gffs)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.srnas = self.srnas
        args.out_folder = self.out
        args.trans = self.trans
        args.fastas = self.fastas
        self.sorf = sORFDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_start_stop_codon(self):
        gff_path = os.path.join(self.out, "gffs")
        table_path = os.path.join(self.out, "tables")
        os.mkdir(gff_path)
        os.mkdir(table_path)
        os.mkdir(os.path.join(gff_path, "all_candidates"))
        os.mkdir(os.path.join(table_path, "all_candidates"))
        os.mkdir(os.path.join(gff_path, "best_candidates"))
        os.mkdir(os.path.join(table_path, "best_candidates"))
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.gff"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.csv"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.gff"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.csv"),
                 "test")
        so.sorf_detection = self.mock.mock_sorf_detection
        args = self.mock_args.mock()
        args.libs = "libs"
        args.tex_notex = "tex_notex"
        args.replicates = "replicates"
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.background = "background"
        args.wig_path = "wig_path"
        args.merge_wigs = "merge_wigs"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.sorf._start_stop_codon(["test"], args, log)
        self.assertTrue(os.path.exists(os.path.join(
            gff_path, "best_candidates/test_sORF.gff")))
        self.assertTrue(os.path.exists(os.path.join(
            gff_path, "all_candidates/test_sORF.gff")))
        self.assertTrue(os.path.exists(os.path.join(
            table_path, "best_candidates/test_sORF.csv")))
        self.assertTrue(os.path.exists(os.path.join(
            table_path, "all_candidates/test_sORF.csv")))        
        log.close()

    def test_compare_tran_cds(self):
        so.get_intergenic = self.mock.mock_get_intergenic
        gen_file(os.path.join(self.test_folder, "test.gff"), "test")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.gffs = self.test_folder
        args.hypo = False
        args.utr_detect = True
        args.extend_5 = 5
        args.extend_3 = 75
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        prefixs = self.sorf._compare_tran_cds(args, log)
        self.assertListEqual(prefixs, ["test"])
        log.close()

    def test_run_sorf_detection(self):
        gff_path = os.path.join(self.out, "gffs")
        table_path = os.path.join(self.out, "tables")
        os.mkdir(gff_path)
        os.mkdir(table_path)
        os.mkdir(os.path.join(gff_path, "all_candidates"))
        os.mkdir(os.path.join(table_path, "all_candidates"))
        os.mkdir(os.path.join(gff_path, "best"))
        os.mkdir(os.path.join(table_path, "best"))
        so.get_intergenic = self.mock.mock_get_intergenic
        so.sorf_detection = self.mock.mock_sorf_detection
        self.sorf._remove_tmp = self.mock.mock_remove_tmp
        self.sorf._check_gff = self.mock.mock_check_gff
        self.sorf._check_necessary_files = self.mock.mock_check_necessary_files
        self.sorf.multiparser = Mock_Multiparser()
        args = self.mock_args.mock()
        args.trans = self.trans
        args.gffs = self.gffs
        args.tsss = self.tsss
        args.out_folder = self.out
        args.libs = "libs"
        args.tex_notex = "tex_notex"
        args.replicates = "replicates"
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.background = "background"
        args.wig_path = "wig_path"
        args.merge_wigs = "merge_wigs"
        args.fuzzy_rbs = 2
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.sorf.run_sorf_detection(args, log)
        log.close()
class TestTranscriptAssembly(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_wig(self):
        libs = [{"name": "test1", "type": "frag",
                 "cond": "1", "strand": "+", "rep": "a"}]
        filename = os.path.join(self.test_folder, "test_f.wig")
        gen_file(filename, self.example.wig_f)
        wigs = ta.read_wig(filename, libs, "+")
        self.assertDictEqual(wigs, self.example.wigs_f)

    def test_detect_hight_toler(self):
        cover = {"coverage": 100, "track": "test_1"}
        height = 5
        tmp_covers = {"best": 10, "toler": 2}
        tracks = []
        ta.detect_hight_toler(cover, height, tmp_covers, tracks)
        self.assertDictEqual(tmp_covers, {'best': 100, 'toler': 2})

    def test_check_tex_conds(self):
        check_tex = []
        tracks = ["test1", "test2"]
        libs = [{"name": "test1", "type": "frag",
                 "cond": "1", "strand": "+", "rep": "a"},
                {"name": "test2", "type": "tex",
                 "cond": "2", "strand": "+", "rep": "a"}]
        texs = {"test1": 2, "test2": 2}
        conds = {}
        ta.check_tex_conds(tracks, libs, texs, check_tex, conds, 1)
        self.assertDictEqual(conds, {'1_frag': 1, '2_tex': 1})

    def test_elongation(self):
        covers = [{"coverage": 10, "pos": 10, "track": "test1"},
                  {"coverage": 1, "pos": 10, "track": "test2"},
                  {"coverage": 100, "pos": 11, "track": "test1"},
                  {"coverage": 20, "pos": 11, "track": "test2"}]
        libs = [{"name": "test1", "type": "tex",
                 "cond": "1", "strand": "+", "rep": "a"},
                {"name": "test2", "type": "notex",
                 "cond": "1", "strand": "+", "rep": "a"}]
        reps = {"tex": 1, "frag": 1}
        tmp_texs = {"test1_test2": 2}
        tolers = []
        trans = {"aaa": []}
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 5
        args.tex = 2
        cover_best, conds, tracks, texs, pos = ta.elongation(covers, tmp_texs,
                                               libs, "+", trans, args, "aaa", tolers)
        self.assertEqual(cover_best, 100)
        self.assertListEqual(tracks, ['test1', 'test2'])
        self.assertDictEqual(texs, {'test1_test2': 2})
        self.assertEqual(pos, 11)
        self.assertDictEqual(trans, {'aaa': [{'coverage': 10, 'cond': 1, 'strand': '+', 'pos': 10}]})

    def test_transfer_to_tran(self):
        reps = {"tex": 1, "frag": 1}
        tmp_texs = {"test1": 2}
        libs = [{"name": "test1", "type": "frag",
                 "cond": "1", "strand": "+", "rep": "a"}]
        args = self.mock_args.mock()
        args.height = 10
        args.tex = 1
        args.replicates = reps
        tolers, trans = ta.transfer_to_tran(self.example.wigs_f, libs, tmp_texs, "+", args)
        self.assertDictEqual(tolers, {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7.0]})
        self.assertDictEqual(trans, {'aaa': [{'pos': 3, 'cond': 1, 'strand': '+', 'coverage': 41.0},
                                             {'pos': 4, 'cond': 1, 'strand': '+', 'coverage': 47.0},
                                             {'pos': 6, 'cond': 1, 'strand': '+', 'coverage': 47.0},
                                             {'pos': 8, 'cond': 1, 'strand': '+', 'coverage': 47.0}]})

    def test_fill_gap_and_print(self):
        trans = {'aaa': [{'pos': 3, 'cond': 1, 'strand': '+', 'coverage': 41.0},
                         {'pos': 4, 'cond': 1, 'strand': '+', 'coverage': 47.0},
                         {'pos': 6, 'cond': 1, 'strand': '+', 'coverage': 47.0},
                         {'pos': 8, 'cond': 1, 'strand': '+', 'coverage': 47.0}]}
        out = StringIO()
        tolers = {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 20]}
        args = self.mock_args.mock()
        args.tolerance = 3
        args.low_cutoff = 5
        args.width = 1
        ta.fill_gap_and_print(trans, "+", out, tolers, "TEX", args)
        self.assertEqual(out.getvalue(), self.example.out_tran + "\n")

    def test_print_transctipt(self):
        out = StringIO()
        ta.print_transctipt(100, 200, 20, 1, 40, "TEX",
                            20, out, "aaa", "+")
        self.assertEqual(out.getvalue(), "aaa\tANNOgesic\tTranscript\t100\t200\t.\t+\t.\tID=tran_1;Name=Transcript_00001;high_coverage=40;low_coverage=20;detect_lib=TEX\n")


    def test_assembly(self):
        wig_f_file = os.path.join(self.test_folder, "aaa_forward.wig")
        wig_r_file = os.path.join(self.test_folder, "aaa_reverse.wig")
        wig_f2_file = os.path.join(self.test_folder, "aaa2_forward.wig")
        wig_r2_file = os.path.join(self.test_folder, "aaa2_reverse.wig")
        gen_file(wig_f_file, self.example.wig_f)
        gen_file(wig_r_file, self.example.wig_r)
        gen_file(wig_f2_file, self.example.wig_f)
        gen_file(wig_r2_file, self.example.wig_r)
        reps = {"tex": 1, "frag": 1}
        out_file = os.path.join(self.test_folder, "out")
        input_lib = ["aaa_forward.wig:frag:1:a:+",
                     "aaa_reverse.wig:frag:1:a:-",
                     "aaa2_forward.wig:tex:1:a:+",
                     "aaa2_reverse.wig:tex:1:a:-"]
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 10
        args.width = 1
        args.tolerance = 3
        args.tex = 2
        args.low_cutoff = 5
        ta.assembly(wig_f_file, wig_r_file, self.test_folder, input_lib,
                    out_file, "TEX", args)
        datas = import_data(out_file)
        self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.out_tran)
class TestMergeRNAplexRNAup(unittest.TestCase):
    def setUp(self):
        self.test_folder = "test_project"
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.mkdir(self.test_folder)
        self.example = Example()
        self.mock_args = MockClass()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_energy(self):
        srna = {"energy": -2}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -5.3})
        srna = {"energy": -8}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -8.0})

    def test_print_rank_one(self):
        out = StringIO()
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        mrr.print_rank_one(self.example.srnas, out, "RNAplex",
                           self.example.gffs, self.example.srna_gffs, args_tar,
                           50)
        datas = convert_dict(out.getvalue().split("\n"))
        news = {}
        for key, value in datas.items():
            if len(key) != 0:
                news[key] = value
        refs = convert_dict(self.example.out_print.split("\n"))
        self.assertDictEqual(news, refs)

    def test_read_table(self):
        rnaplex = os.path.join(self.test_folder, "rnaplex")
        rnaup = os.path.join(self.test_folder, "rnaup")
        gen_file(rnaplex, self.example.rnaplex)
        gen_file(rnaup, self.example.rnaup)
        srnas = mrr.read_table(self.example.srna_gffs, rnaplex, rnaup,
                               self.example.genes, self.example.gffs, ["CDS"])
        self.assertDictEqual(
            srnas, {
                'RNAup': {
                    'srna0': [{
                        'srna_pos': '20,25',
                        'energy': -4.87,
                        'tar_pos': '571,576',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }, {
                        'srna_pos': '11,26',
                        'energy': -5.91,
                        'tar_pos': '14,30',
                        'gene_id': 'NA',
                        'target_id': 'cds1',
                        'target_locus': 'AAA_00003',
                        'detail': '2348-2934_+'
                    }]
                },
                'RNAplex': {
                    'srna0': [{
                        'srna_pos': '20,25',
                        'energy': -5.3,
                        'tar_pos': '571,576',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }],
                    'srna1': [{
                        'srna_pos': '24,31',
                        'energy': -1.91,
                        'tar_pos': '163,170',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }]
                }
            })

    def test_get_srna_name(self):
        output = mrr.get_srna_name(self.example.srna_gffs, "srna0")
        self.assertEqual(output[0], 'sRNA_0')
        self.assertEqual(output[1].start, 6)

    def test_get_target_info(self):
        target = {
            "gene_id": "gene0",
            "detail": "100-150_+",
            "target_id": "cds0",
            "target_locus": "AAA_00001",
            "energy": -6.5
        }
        output = mrr.get_target_info(self.example.gffs, target)
        self.assertEqual(output.start, 100)

    def test_merge_base_rnaplex(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        merges = []
        overlap = mrr.merge_base_rnaplex(self.example.srnas,
                                         self.example.srna_gffs, args_tar,
                                         self.example.gffs, merges, 50)
        output = [[
            'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0', 'cds0',
            'AAA_00001', '100-150', '89-50', '89-50', '+', '-6.5', '1', '-6.5',
            '1'
        ],
                  [
                      'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267',
                      '+', 'gene2', 'cds2', 'AAA_00003', '2348-2934',
                      '2337-50', '2337-50', '+', '-10.5', '1', '-10.5', '1'
                  ],
                  [
                      'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516',
                      '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50',
                      '89-50', '+', '-23.5', '1', '-23.5', '1'
                  ]]
        count = 0
        for out in output:
            for data in overlap:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)

    def test_merge_base_rnaup(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        srnas = {
            "RNAplex": {
                "srna0": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -6.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna1": [{
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -10.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna2": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -23.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }, {
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -6.5,
                    "rank": 2,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }]
            },
            "RNAup": {
                "srna0": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -6.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna1": [{
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -10.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna2": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -23.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }]
            }
        }
        merges = []
        mrr.merge_base_rnaup(srnas, self.example.srna_gffs, args_tar,
                             self.example.gffs, merges, 50)
        output = [[
            'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+',
            'gene2', 'cds2', 'AAA_00003', '2348-2934', '2337-50', '2337-50',
            '+', '-10.5', '1', '-10.5', '1'
        ],
                  [
                      'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516',
                      '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50',
                      '89-50', '+', '-23.5', '1', '-23.5', '1'
                  ],
                  [
                      'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0',
                      'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+',
                      '-6.5', '1', '-6.5', '1'
                  ]]
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)
Пример #30
0
class TestSubLocal(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.gffs = "test_folder/gffs"
        self.stat = "test_folder/stat"
        self.trans = "test_folder/tran"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.stat)
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.out_folder = self.out
        args.trans = self.trans
        self.sub = SubLocal(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_protein_seq(self):
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file)
        gff = "aaa.gff"
        gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file)
        gen_file(os.path.join(self.trans, "aaa_transcript.gff"), self.example.tran_file)
        prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans)
        self.assertEqual(prefix, "aaa")

    def test_run_psortb(self):
        self.sub._psortb = self.mock.mock_psortb
        tmp_result = os.path.join(self.out, "tmp_results")
        os.mkdir(tmp_result)
        args = self.mock_args.mock()
        args.psortb_path = "psortb_path"
        args.gram = "positive"
        self.sub._run_psortb(args, "aaa", self.out, self.test_folder, tmp_result)
        self.assertTrue(os.path.exists(os.path.join(self.out, "tmp_log")))
        self.assertTrue(os.path.exists(os.path.join(tmp_result,
                       "_".join(["aaa", "raw.txt"]))))

    def test_merge_and_stat(self):
        su.stat_sublocal = self.mock.mock_stat_sublocal
        os.mkdir(os.path.join(self.gffs, "aaa.gff_folder"))
        gen_file(os.path.join(self.gffs, "aaa.gff_folder/aaa.gff"), "test")
        os.mkdir(os.path.join(self.out, "psortb_results"))
        gen_file(os.path.join(self.out, "aaa_raw.txt"), "test")
        gen_file(os.path.join(self.out, "aaa_table.csv"), "test")
        self.sub._merge_and_stat(self.gffs, self.out, self.test_folder, self.stat)
        self.assertTrue(os.path.exists(os.path.join(self.stat, "aaa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "aaa")))

    def test_compare_cds_tran(self):
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tran_file = os.path.join(self.test_folder, "aaa_transcript.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.tran_file)
        self.sub._compare_cds_tran(gff_file, tran_file)
        datas, string = extract_info("test_folder/output/all_CDS/tmp_cds.gff", "file")
        self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
Пример #31
0
class TestCircRNA(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_feature(self):
        attributes_cds = {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001",
                           "protein_id": "YP_918384.3"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "AAA_00001")
        attributes_cds = {"ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "YP_918384.3")
        attributes_cds = {"ID": "cds0", "Name": "CDS_0"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "cds0:122-267_f")

    def test_detect_conflict(self):
        circ_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "circRNA", "start": 100,
                     "end": 467, "phase": ".", "strand": "+", "score": ".", "support": 30,
                     "start_site": 30, "end_site": 35, "situation": "P", "splice_type": "C"}
        attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"}
        circrna = Create_generator(circ_dict, attributes_circ, "circ")
        gffs = [Create_generator(self.example.cds_dict, self.example.attributes_cds, "gff")]
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        out = StringIO()
        out_best = StringIO()
        circ.detect_conflict(gffs, circrna, 0, out, out_best, args)
        self.assertEqual(out.getvalue(), "circRNA_0	aaa	+	100	467	AAA_00001	30	1.0	0.8571428571428571\n")
        out.close()

    def test_get_circrna(self):
        circs = []
        gffs = []
        for index in range(0, 5):
            circs.append(Create_generator(self.example.circ_dict[index],
                                          self.example.attributes_circ[index], "circ"))
        for index in range(0, 3):
            gffs.append(Create_generator(self.example.gffs_dict[index],
                                         self.example.attributes_gffs[index], "gff"))
        out = StringIO()
        out_best = StringIO()
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        nums = circ.get_circrna(circs, gffs, 50, out, out_best, args)
        self.assertDictEqual(nums["support"], {'aaa': {0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1},
                                               'all': {0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1},
                                               'bbb': {0: 1, 5: 1}})
        self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3})
        self.assertDictEqual(nums["conflict"], {'bbb': {0: 1, 5: 1},
                                                'aaa': {},
                                                'all': {0: 1, 5: 1}})

    def test_detect_circrna(self):
        out_file = os.path.join(self.test_folder, "out_all.csv")
        stat_file = os.path.join(self.test_folder, "stat.csv")
        circ.read_file = Mock_read_file().read_file
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        args.hypo = True
        circ.detect_circrna("test.circ", "test.gff", out_file, args, stat_file)
        circs = import_data(out_file)
        stats = import_data(stat_file)
        self.assertEqual(set(circs), set(self.example.out_file.split("\n")))
        self.assertEqual(set(stats), set(self.example.stat_file.split("\n")))
Пример #32
0
class TestTranscriptAssembly(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_wig(self):
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "frag_1",
            "strand": "+",
            "rep": "a"
        }]
        filename = os.path.join(self.test_folder, "test_f.wig")
        gen_file(filename, self.example.wig_f)
        wigs = ta.read_wig(filename, "+", libs)
        for i in range(len(wigs["aaa"]['frag_1']["test1|+|frag"])):
            self.assertEqual(
                wigs["aaa"]['frag_1']["test1|+|frag"][i],
                self.example.wigs_nf["aaa"]['frag_1']["test1|+|frag"][i])

    def test_detect_hight_toler(self):
        cover = 100
        height = 5
        tmp_covers = {"best": 10, "toler": 2}
        tracks = []
        ta.detect_hight_toler(cover, height, tmp_covers, tracks,
                              "test_1|+|frag")
        self.assertDictEqual(tmp_covers, {'best': 100, 'toler': 2})

    def test_check_tex_conds(self):
        check_tex = []
        tracks = ["test1", "test2"]
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "1",
            "strand": "+",
            "rep": "a"
        }, {
            "name": "test2",
            "type": "tex",
            "cond": "2",
            "strand": "+",
            "rep": "a"
        }]
        texs = {"test1": 2, "test2": 2}
        conds = {}
        ta.check_tex_conds(tracks, libs, texs, check_tex, conds, 1)
        self.assertDictEqual(conds, {'1': 1, '2': 1})

    def test_elongation(self):
        covers = {
            "texnotex_1": {
                "test1|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 20],
                "test2|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100]
            }
        }
        libs = [{
            "name": "test1",
            "type": "tex",
            "cond": "texnotex_1",
            "strand": "+",
            "rep": "a"
        }, {
            "name": "test2",
            "type": "notex",
            "cond": "texnotex_1",
            "strand": "+",
            "rep": "a"
        }]
        reps = {"tex": ["all_1"], "frag": ["all_1"]}
        tmp_texs = {"test1_test2": 2}
        tolers = []
        trans = {"aaa": []}
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 5
        args.tex = 2
        ta.elongation(covers, tmp_texs, libs, "+", trans, args, "aaa", [])
        self.assertDictEqual(
            trans, {'aaa': [-1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 100]})

    def test_transfer_to_tran(self):
        reps = {"tex": ["all_1"], "frag": ["all_1"]}
        tmp_texs = {"test1": 2}
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "frag_1",
            "strand": "+",
            "rep": "a"
        }]
        args = self.mock_args.mock()
        args.height = 10
        args.tex = 1
        args.replicates = reps
        tolers, trans = ta.transfer_to_tran(self.example.wigs_f, libs,
                                            tmp_texs, "+", args)
        self.assertDictEqual(tolers,
                             {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7.0, 20]})
        self.assertDictEqual(trans,
                             {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]})

    def test_fill_gap_and_print(self):
        trans = {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]}
        tolers = {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7, 7, 7, 7, 7, 7, 7, 20]}
        args = self.mock_args.mock()
        args.tolerance = 3
        args.low_cutoff = 5
        args.width = 1
        finals = {}
        ta.fill_gap_and_print(trans, "+", finals, tolers, "TEX", args)
        self.assertDictEqual(
            finals, {
                'aaa': [{
                    'strand': '+',
                    'high': 47.0,
                    'end': 4,
                    'wig': 'TEX',
                    'low': 41.0,
                    'start': 3
                }, {
                    'strand': '+',
                    'high': 47.0,
                    'end': 8,
                    'wig': 'TEX',
                    'low': 47.0,
                    'start': 6
                }]
            })

    def test_print_transctipt(self):
        out = StringIO()
        tas = {
            "aaa": [{
                "start": 100,
                "end": 200,
                "strand": "+",
                "high": 40,
                "low": 20,
                "wig": "TEX"
            }]
        }
        ta.print_transcript(tas, out)
        self.assertEqual(
            out.getvalue(),
            "aaa\tANNOgesic\ttranscript\t100\t200\t.\t+\t.\tID=aaa_transcript0;Name=transcript_00000;high_coverage=40;low_coverage=20;detect_lib=TEX\n"
        )

    def test_assembly(self):
        wig_f_file = os.path.join(self.test_folder, "aaa_forward.wig")
        wig_r_file = os.path.join(self.test_folder, "aaa_reverse.wig")
        wig_f2_file = os.path.join(self.test_folder, "aaa2_forward.wig")
        wig_r2_file = os.path.join(self.test_folder, "aaa2_reverse.wig")
        gen_file(wig_f_file, self.example.wig_f)
        gen_file(wig_r_file, self.example.wig_r)
        gen_file(wig_f2_file, self.example.wig_f)
        gen_file(wig_r2_file, self.example.wig_r)
        reps = {"tex": "all_1", "frag": "all_1"}
        out_file = os.path.join(self.test_folder, "out")
        input_lib = [
            "aaa_forward.wig:frag:1:a:+", "aaa_reverse.wig:frag:1:a:-",
            "aaa2_forward.wig:tex:1:a:+", "aaa2_reverse.wig:tex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 10
        args.width = 1
        args.tolerance = 3
        args.tex = 2
        args.low_cutoff = 5
        ta.detect_transcript(wig_f_file, wig_r_file, self.test_folder,
                             input_lib, out_file, "TEX", args)
        datas = import_data(out_file)
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.out_tran)
Пример #33
0
class TestsRNAUTR(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock = Mock_func()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_data(self):
        pos = {"start": 4, "end": 40, "ori_start": 2, "ori_end": 3}
        datas = sud.import_data("+", "aaa", pos, "3UTR", "TSS", "cds",
                                "srna_cover", "test")
        self.assertDictEqual(
            datas, {
                'start_cleavage': 'NA',
                'strand': '+',
                'end_cleavage': 'test',
                'start_tss': 'cds',
                'end': 40,
                'start': 4,
                'utr': '3UTR',
                'strain': 'aaa',
                'datas': 'srna_cover'
            })

    def test_read_data(self):
        args = self.mock_args.mock()
        args.gff_file = os.path.join(self.test_folder, "test.gff")
        args.ta_file = os.path.join(self.test_folder, "test.gff")
        args.tss_file = os.path.join(self.test_folder, "test.gff")
        args.pro_file = os.path.join(self.test_folder, "test.gff")
        args.seq_file = os.path.join(self.test_folder, "test.fa")
        gen_file(args.gff_file, self.example.gff_file)
        gen_file(args.seq_file, self.example.seq_file)
        args.hypo = False
        cdss, tas, tsss, pros, seq = sud.read_data(args)
        self.assertEqual(cdss[0].start, 4)
        self.assertEqual(tas[0].start, 4)
        self.assertEqual(tsss[0].start, 4)
        self.assertEqual(pros[0].start, 4)
        self.assertDictEqual(seq, {
            'aaa':
            'ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT'
        })

    def test_get_terminal(self):
        inters = []
        seq = {
            "aaa":
            "ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT"
        }
        sud.get_terminal(self.example.gffs, inters, seq, "start")
        self.assertListEqual(inters, [{
            'end': 4,
            'len_CDS': 0,
            'strand': '+',
            'strain': 'aaa',
            'start': 1
        }])

    def test_get_inter(self):
        inters = []
        sud.get_inter(self.example.gffs, inters)
        self.assertListEqual(inters, [{
            'start': 14,
            'strand': '+',
            'end': 20,
            'strain': 'aaa',
            'len_CDS': 10
        }])

    def test_set_cover_and_point(self):
        covers = [2, 3, 4, 1, 6, 2, 8, 3, 5, 6, 7, 5, 2, 1]
        cover_results = {"covers": None, "check_point": None}
        pos = {"start": 2, "end": 6, "ori_start": 2, "ori_end": 3}
        sud.set_cover_and_point(cover_results, self.example.inters[0], covers,
                                pos, 5)
        self.assertListEqual(cover_results["covers"],
                             [2, 3, 4, 1, 6, 2, 8, 3, 5])
        self.assertDictEqual(cover_results["check_point"], {
            'srna_start': 0,
            'utr_start': 2,
            'utr_end': 3,
            'srna_end': 12
        })

    def test_check_import_srna_covers(self):
        args = self.mock_args.mock()
        cover = {"type": "5utr"}
        datas = {
            "num": 0,
            "cover_tmp": {
                "total": 100,
                "ori_total": 200
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 3,
                "end": 23
            }
        }
        cover_results = {
            "cover_sets": {
                "high": 50,
                "low": 10
            },
            "srna_covers": {
                "cond_1": []
            },
            "utr_covers": {
                "cond_1": []
            },
            "type": "5utr",
            "intercds": "TSS"
        }
        args.min_len = 30
        args.max_len = 500
        pos = {"start": 1, "end": 25, "ori_start": 1, "ori_end": 25}
        sud.check_import_srna_covers(datas, cover_results,
                                     self.example.inters[0], "cond_1", "track",
                                     cover, pos, args, "5utr")
        self.assertDictEqual(datas["final_poss"], {'end': 23, 'start': 3})
        self.assertDictEqual(
            cover_results["srna_covers"], {
                'cond_1': [{
                    'final_start': 3,
                    'high': 50,
                    'ori_avg': 8.0,
                    'final_end': 23,
                    'low': 10,
                    'type': '5utr',
                    'avg': 4,
                    'track': 'track'
                }]
            })
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])

        datas["checks"] = {"detect_decrease": False}
        cover_results["srna_covers"] = {"cond_1": []}
        cover_results["utr_covers"] = {"cond_1": []}
        sud.check_import_srna_covers(datas, cover_results,
                                     self.example.inters[0], "cond_1", "track",
                                     cover, pos, args, "5utr")
        self.assertDictEqual(cover_results["srna_covers"], {'cond_1': []})

    def test_check_pos(self):
        cover = {"pos": 4}
        check_point = {
            "utr_start": 1,
            "utr_end": 29,
            "srna_start": 3,
            "srna_end": 11
        }
        checks = {"srna": False, "utr": False}
        sud.check_pos(cover, check_point, checks, 4)
        self.assertDictEqual(checks, {'srna': True, 'utr': True})

    def test_get_cover_5utr(self):
        args = self.mock_args.mock()
        datas = {
            "num": 0,
            "cover_tmp": {
                "5utr": 0
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 1,
                "end": 26
            }
        }
        cover = 20
        cover_sets = {"high": 50, "low": 10}
        args.decrease_utr = 50
        args.fuzzy_utr = 2
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 10})
        self.assertEqual(datas["num"], 0)
        self.assertTrue(go_out)
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 0})
        self.assertDictEqual(cover_sets, {'high': 50, 'low': 10})
        cover = 20
        datas = {
            "num": 0,
            "cover_tmp": {
                "5utr": 30
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 1,
                "end": 26
            }
        }
        cover_sets = {"low": 10, "high": 50}
        args.decrease_utr = 0.5
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertEqual(datas["num"], 1)
        self.assertFalse(go_out)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 26})
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 20})
        self.assertDictEqual(cover_sets, {'low': 20, 'high': 50})

    def test_detect_cover_utr_srna(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        cover_results = {
            "cover_sets": {
                "low": 10,
                "high": 50
            },
            "pos": {
                "low": 10,
                "high": 50
            },
            "covers": [20],
            "type": "5utr",
            "srna_covers": {
                "frag_1": []
            },
            "utr_covers": {
                "frag_1": []
            },
            "intercds": "TSS",
            "check_point": {
                "utr_start": 1,
                "utr_end": 29,
                "srna_start": 2,
                "srna_end": 25
            }
        }
        datas = {
            "num": 0,
            "cover_tmp": {
                "total": 100,
                "ori_total": 200
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 3,
                "end": 23
            }
        }
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 23}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        sud.detect_cover_utr_srna(cover_results, pos, self.example.inters[0],
                                  "frag_1", "track_1", args, "frag", 2, 20,
                                  "+")
        self.assertDictEqual(
            cover_results["srna_covers"], {
                'frag_1': [{
                    'low': 20,
                    'high': 50,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 0.8695652173913043,
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 1.0526315789473684
                }]
            })
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])
        self.assertDictEqual(cover_results["cover_sets"], {
            'best': 20,
            'low': 20,
            'high': 50
        })

    def test_get_coverage(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        sud.detect_cover_utr_srna = self.mock.mock_detect_cover_utr_srna
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        srna_covers, utr_covers = sud.get_coverage(self.example.wigs,
                                                   self.example.inters[0], pos,
                                                   "3utr", "TSS", args)
        self.assertDictEqual(
            srna_covers, {
                'frag_1': [{
                    'track': 'track_1',
                    'high': 50,
                    'final_start': 2,
                    'type': 'frag',
                    'avg': 8.052631578947368,
                    'low': 10,
                    'final_end': 3,
                    'ori_avg': 2.12
                }]
            })
        self.assertDictEqual(utr_covers, srna_covers)

    def test_get_utr_cutoff(self):
        mediandict = {"aaa": {"5utr": {"bbb": {}}}}
        avgs = [30, 60, 550, 302, 44]
        sud.get_utr_cutoff("p_0.5", mediandict, avgs, "aaa", "5utr", "bbb")
        self.assertDictEqual(
            mediandict,
            {'aaa': {
                '5utr': {
                    'bbb': {
                        'mean': 197.2,
                        'median': 60
                    }
                }
            }})

    def test_detect_normal(self):
        sud.get_coverage = self.mock.mock_get_coverage
        diff = 50
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.utrs = []
        args.srnas = []
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos,
                          "3utr", self.example.tsss[0], args)
        self.assertListEqual(args.srnas, [{
            'end': 20,
            'strand': '+',
            'datas': {
                'frag_1': [{
                    'track': 'track_1',
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'high': 50,
                    'type': 'frag',
                    'final_end': 20,
                    'ori_avg': 27.52,
                    'low': 10
                }]
            },
            'end_cleavage': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'strain': 'aaa',
            'start': 2,
            'start_tss': 'TSS:1_+'
        }])
        self.assertListEqual(args.utrs, [{
            'end': 20,
            'strand': '+',
            'datas': {
                'frag_1': [{
                    'track': 'track_1',
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'high': 50,
                    'type': 'frag',
                    'final_end': 20,
                    'ori_avg': 27.52,
                    'low': 10
                }]
            },
            'end_cleavage': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'strain': 'aaa',
            'start': 2,
            'start_tss': 'NA'
        }])
        args.utrs = []
        args.srnas = []
        args.pros = self.example.pros
        args.min_len = 3
        args.max_len = 20
        pos = {"start": 2, "end": 24, "ori_start": 1, "ori_end": 25}
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos,
                          "3utr", self.example.tsss[0], args)
        self.assertListEqual(args.srnas, [{
            'start': 1,
            'end': 18,
            'start_tss': 'TSS:1_+',
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'track': 'track_1',
                    'high': 50,
                    'low': 10,
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'final_start': 2
                }]
            },
            'start_cleavage': 'NA',
            'end_cleavage': 'Cleavage:18_+',
            'utr': '3utr',
            'strand': '+',
            'strain': 'aaa'
        }])
        sud.get_coverage = get_coverage

    def test_detect_3utr_pro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 1
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = self.example.pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        sud.detect_3utr_pro(self.example.inters[0], pos, self.example.wigs,
                            "3utr", args)
        self.assertListEqual(args.srnas, [{
            'end_cleavage': 'NA',
            'end': 20,
            'start_cleavage': 'Cleavage:18_+',
            'utr': '3utr',
            'datas': {
                'frag_1': [{
                    'low': 10,
                    'final_start': 2,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'ori_avg': 27.52,
                    'high': 50
                }]
            },
            'strand': '+',
            'start_tss': 'NA',
            'start': 18,
            'strain': 'aaa'
        }])
        self.assertListEqual(args.utrs, [{
            'end_cleavage': 'NA',
            'end': 20,
            'start_cleavage': 'NA',
            'utr': '3utr',
            'datas': {
                'frag_1': [{
                    'low': 10,
                    'final_start': 2,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'ori_avg': 27.52,
                    'high': 50
                }]
            },
            'strand': '+',
            'start_tss': 'NA',
            'start': 18,
            'strain': 'aaa'
        }])
        sud.get_coverage = get_coverage

    def test_detect_twopro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        pro_dict = [{
            "seq_id": "aaa",
            "source": "tsspredator",
            "feature": "processing",
            "start": 18,
            "end": 18,
            "phase": ".",
            "strand": "+",
            "score": "."
        }, {
            "seq_id": "aaa",
            "source": "tsspredator",
            "feature": "processing",
            "start": 38,
            "end": 38,
            "phase": ".",
            "strand": "+",
            "score": "."
        }]
        attributes_pro = [{
            "ID": "processing0",
            "Name": "Processing_0"
        }, {
            "ID": "processing1",
            "Name": "Processing_1"
        }]
        pros = []
        for index in range(0, 2):
            pros.append(
                Create_generator(pro_dict[index], attributes_pro[index],
                                 "gff"))
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 3
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 50, "ori_start": 1, "ori_end": 25}
        sud.detect_twopro(self.example.inters[0], pos, self.example.wigs,
                          "interCDS", "interCDS", args)
        self.assertListEqual(args.srnas, [{
            'start_cleavage': 'Cleavage:18_+',
            'utr': 'interCDS',
            'datas': {
                'frag_1': [{
                    'type': 'frag',
                    'low': 10,
                    'final_start': 2,
                    'high': 50,
                    'avg': 41.36842105263158,
                    'final_end': 20,
                    'track': 'track_1',
                    'ori_avg': 27.52
                }]
            },
            'start_tss': 'NA',
            'end_cleavage': 'Cleavage:38_+',
            'strand': '+',
            'end': 38,
            'strain': 'aaa',
            'start': 18
        }])
        self.assertListEqual(args.utrs, [{
            'start_cleavage': 'NA',
            'utr': 'interCDS',
            'datas': {
                'frag_1': [{
                    'type': 'frag',
                    'low': 10,
                    'final_start': 2,
                    'high': 50,
                    'avg': 41.36842105263158,
                    'final_end': 20,
                    'track': 'track_1',
                    'ori_avg': 27.52
                }]
            },
            'start_tss': 'NA',
            'end_cleavage': 'Cleavage:38_+',
            'strand': '+',
            'end': 38,
            'strain': 'aaa',
            'start': 18
        }])
        sud.get_coverage = get_coverage

    def test_run_utr_detection(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"5utr": "n_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        sud.get_coverage = self.mock.mock_get_coverage
        sud.run_utr_detection(self.example.wigs, self.example.inters[0], 2, 50,
                              "5utr", args)
        sud.get_coverage = get_coverage
        self.assertListEqual(args.srnas, [{
            'start': 1,
            'end': 50,
            'start_cleavage': 'NA',
            'datas': {
                'frag_1': [{
                    'high': 50,
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'low': 10,
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            },
            'start_tss': 'TSS:1_+',
            'strain': 'aaa',
            'strand': '+',
            'utr': '5utr',
            'end_cleavage': 'NA'
        }])
        self.assertListEqual(args.utrs, [{
            'start': 1,
            'end': 50,
            'start_cleavage': 'NA',
            'datas': {
                'frag_1': [{
                    'high': 50,
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'low': 10,
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            },
            'start_tss': 'NA',
            'strain': 'aaa',
            'strand': '+',
            'utr': '5utr',
            'end_cleavage': 'NA'
        }])

    def test_class_utr(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"3utr": "p_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        args.wig_fs = self.example.wigs
        sud.get_coverage = self.mock.mock_get_coverage
        sud.class_utr(self.example.inters[0], self.example.tas[0], args,
                      args.wig_fs, args.wig_fs)
        sud.get_coverage = get_coverage
        self.assertListEqual(args.srnas, [{
            'end_cleavage': 'NA',
            'start_tss': 'TSS:1_+',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 1,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }, {
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'Cleavage:18_+',
            'end': 20,
            'start': 18,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }])
        self.assertListEqual(args.utrs, [{
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 1,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }, {
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 18,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }])

    def test_get_utr_coverage(self):
        utrs = [{
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'NA'
        }]
        covers = sud.get_utr_coverage(utrs)
        self.assertDictEqual(covers, {
            'aaa': {
                'interCDS': {},
                '3utr': {
                    'track_1': [27.52]
                },
                '5utr': {}
            }
        })

    def test_set_cutoff(self):
        args = self.mock_args.mock()
        args.texs = {"track_4@AND@track_6": 0}
        covers = {
            'aaa': {
                '5utr': {
                    'track_4': [52, 11, 23]
                },
                'inter': {
                    'track_3': [111]
                },
                'total': {
                    'track_1': [27.52, 111]
                },
                '3utr': {
                    'track_1': [27.52, 111]
                },
                'interCDS': {
                    'track_2': [12, 0]
                }
            }
        }
        args.coverages = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"}
        args.cover_notex = {
            "5utr": "p_0.3",
            "3utr": "n_10",
            "interCDS": "p_0.5"
        }
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(
            mediandict, {
                'aaa': {
                    '5utr': {
                        'track_4': {
                            'median': 11,
                            'mean': 28.666666666666668
                        }
                    },
                    'interCDS': {
                        'track_2': {}
                    },
                    '3utr': {
                        'track_1': {}
                    }
                }
            })
        args.cover_notex = None
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(
            mediandict, {
                'aaa': {
                    '3utr': {
                        'track_1': {
                            'mean': 69.26,
                            'median': 10.0
                        }
                    },
                    '5utr': {
                        'track_4': {
                            'mean': 28.666666666666668,
                            'median': 11
                        }
                    },
                    'interCDS': {
                        'track_2': {
                            'mean': 6.0,
                            'median': 0
                        }
                    }
                }
            })

    def test_mean_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        mean = sud.mean_score(lst)
        self.assertEqual(mean, 5.0)

    def test_median_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        median = sud.median_score(lst, 0.5)
        self.assertEqual(median, 5)

    def test_detect_srna(self):
        sud.replicate_comparison = self.mock.mock_replicate_comparison
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        median = {"aaa": {"3utr": 555}}
        args.srnas = [{
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50,
                    "conds": ["frag"]
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'Cleavage:18_+'
        }]
        sud.detect_srna(median, args)
        self.assertEqual(
            args.out.getvalue(),
            "aaa\tANNOgesic\tncRNA\t18\t20\t.\t+\t.\tID=srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n"
        )
        self.assertEqual(
            args.out_t.getvalue(),
            "aaa\t00000\t18\t20\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n"
        )

    def test_print_file(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        srna = {
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50,
                    "conds": ["frag"]
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'Cleavage:18_+'
        }
        srna_datas = {
            "best": 500,
            "track": "frag",
            "high": 700,
            "low": 400,
            "start": 100,
            "end": 202,
            "conds": {
                "frag_1": "track_1"
            }
        }
        sud.print_file(0, srna, 2, 50, srna_datas, args)
        self.assertEqual(
            args.out.getvalue(),
            "aaa\tANNOgesic\tncRNA\t2\t50\t.\t+\t.\tID=srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n"
        )
        self.assertEqual(
            args.out_t.getvalue(),
            "aaa\t00000\t2\t50\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n"
        )
Пример #34
0
class TestCircRNA(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_feature(self):
        attributes_cds = {
            "ID": "cds0",
            "Name": "CDS_0",
            "locus_tag": "AAA_00001",
            "protein_id": "YP_918384.3"
        }
        attributes = circ.get_feature(
            Create_generator(self.example.cds_dict, attributes_cds, "gff"))
        self.assertEqual(attributes, "AAA_00001")
        attributes_cds = {
            "ID": "cds0",
            "Name": "CDS_0",
            "protein_id": "YP_918384.3"
        }
        attributes = circ.get_feature(
            Create_generator(self.example.cds_dict, attributes_cds, "gff"))
        self.assertEqual(attributes, "YP_918384.3")
        attributes_cds = {"ID": "cds0", "Name": "CDS_0"}
        attributes = circ.get_feature(
            Create_generator(self.example.cds_dict, attributes_cds, "gff"))
        self.assertEqual(attributes, "cds0:122-267_f")

    def test_detect_conflict(self):
        circ_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "circRNA",
            "start": 100,
            "end": 467,
            "phase": ".",
            "strand": "+",
            "score": ".",
            "support": 30,
            "start_site": 30,
            "end_site": 35,
            "situation": "P",
            "splice_type": "C"
        }
        attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"}
        circrna = Create_generator(circ_dict, attributes_circ, "circ")
        gffs = [
            Create_generator(self.example.cds_dict,
                             self.example.attributes_cds, "gff")
        ]
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        out = StringIO()
        out_best = StringIO()
        circ.detect_conflict(gffs, circrna, 0, out, out_best, args)
        self.assertEqual(
            out.getvalue(),
            "circRNA_0	aaa	+	100	467	AAA_00001	30	1.0	0.8571428571428571\n")
        out.close()

    def test_get_circrna(self):
        circs = []
        gffs = []
        for index in range(0, 5):
            circs.append(
                Create_generator(self.example.circ_dict[index],
                                 self.example.attributes_circ[index], "circ"))
        for index in range(0, 3):
            gffs.append(
                Create_generator(self.example.gffs_dict[index],
                                 self.example.attributes_gffs[index], "gff"))
        out = StringIO()
        out_best = StringIO()
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        nums = circ.get_circrna(circs, gffs, 50, out, out_best, args)
        self.assertDictEqual(
            nums["support"], {
                'aaa': {
                    0: 2,
                    20: 1,
                    5: 2,
                    25: 1,
                    10: 2,
                    30: 1,
                    15: 1
                },
                'all': {
                    0: 3,
                    20: 1,
                    5: 3,
                    25: 1,
                    10: 2,
                    30: 1,
                    15: 1
                },
                'bbb': {
                    0: 1,
                    5: 1
                }
            })
        self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3})
        self.assertDictEqual(nums["conflict"], {
            'bbb': {
                0: 1,
                5: 1
            },
            'aaa': {},
            'all': {
                0: 1,
                5: 1
            }
        })

    def test_detect_circrna(self):
        out_file = os.path.join(self.test_folder, "out_all.csv")
        stat_file = os.path.join(self.test_folder, "stat.csv")
        circ.read_file = Mock_read_file().read_file
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        args.hypo = True
        circ.detect_circrna("test.circ", "test.gff", out_file, args, stat_file)
        circs = import_data(out_file)
        stats = import_data(stat_file)
        self.assertEqual(set(circs), set(self.example.out_file.split("\n")))
        self.assertEqual(set(stats), set(self.example.stat_file.split("\n")))
Пример #35
0
class TestsTranscriptAssembly(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.terms = "test_folder/terms"
        self.stat = "test_folder/output/statistics"
        self.out_gff = "test_folder/output/gffs"
        self.out_table = "test_folder/output/tables"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(self.out)
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(os.path.join(self.frag, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.tsss)
            os.mkdir(self.terms)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
            os.mkdir(self.out_table)
        args = self.mock_args.mock()
        args.out_folder = self.out
        self.tran = TranscriptDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compute(self):
        pre_assembly = tr.detect_transcript
        tr.detect_transcript = self.mock.mock_assembly
        gen_file(os.path.join(
            self.frag, "tmp/test_forward.wig"), "test")
        args = self.mock_args.mock()
        args.replicates = "rep"
        args.out_foler = self.out
        strains = self.tran._compute("frag", self.frag, "libs", args)
        self.assertListEqual(strains, ['test'])
        tr.assembly = pre_assembly

    def test_for_one_wig(self):
        pre_assembly = tr.assembly
        tr.assembly = self.mock.mock_assembly
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.frag, "tmp/test_forward.wig"), "test")
        gen_file(os.path.join(self.out, "test_frag"), self.example.tran_file)
        args = self.mock_args.mock()
        args.replicates = "rep"
        args.libs = "libs"
        args.gffs = self.gffs
        args.out_folder = self.out
        args.frag_wigs = self.frag
        args.flibs = "flibs"
        strains = self.tran._for_one_wig("frag", args)
        self.assertListEqual(strains, ['test'])
        datas = import_data(os.path.join(
            self.out_gff, "test_transcript_frag.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)
        tr.assembly = pre_assembly

    def test_for_two_wigs(self):
        pre_combine = tr.combine
        tr.combine = self.mock.mock_combine
        gen_file(os.path.join(
            self.out_gff, "test_transcript_fragment.gff"), "test")
        gen_file(os.path.join(
            self.out_gff, "test_transcript_tex_notex.gff"), "test")
        args = self.mock_args.mock()
        args.frag_wigs = self.frag
        args.tex_wigs = self.tex
        args.gffs = self.gffs
        args.tolerance = 5
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran._for_two_wigs(["test"], args, log)
        self.assertTrue(os.path.exists(
            os.path.join(self.out_gff, "test_transcript.gff")))
        tr.combine = pre_combine

    def test_post_modify(self):
        pre_longer = tr.longer_ta
        pre_fill = tr.fill_gap
        tr.longer_ta = self.mock.mock_longer_ta
        tr.fill_gap = self.mock.mock_fill_gap
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gff_out = os.path.join(self.out, "gffs")
        os.mkdir(os.path.join(self.out, "tmp_tran"))
        gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.out_folder = self.out
        args.length = 20
        args.modify = "merge_overlap"
        self.tran._post_modify(["test"], args)
        self.assertTrue(os.path.exists(os.path.join(
            gff_out, "test_transcript.gff")))
        tr.longer_ta = pre_longer
        tr.fill_gap = pre_fill

    def test_compare_cds(self):
        tr.stat_ta_gff = self.mock.mock_stat_ta_gff
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.gffs, "tmp/test.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.out_gff, "test_transcript.gff"),
                 self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(os.path.join(gff_out, "tmp_ta_gff"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_gff_ta"), self.example.gff_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.trans = self.trans
        args.gffs = self.gffs
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args.c_feature = ["CDS"]
        self.tran._compare_cds(["test"], args, log)
        datas = import_data(os.path.join(self.gffs, "test.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.gff_file)
        datas = import_data(os.path.join(self.out_gff, "test_transcript.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)

    def test_compare_tss(self):
        tr.stat_ta_tss = self.mock.mock_stat_ta_tss
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "test_TSS.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.gffs, "tmp/test_TSS.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.out_gff, "test_transcript.gff"),
                 self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(os.path.join(gff_out, "tmp_ta_tss"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_tss_ta"), self.example.gff_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.trans = self.trans
        args.compare_tss = self.gffs
        args.fuzzy = 2
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran._compare_tss(["test"], args, log)
        datas = import_data(os.path.join(self.gffs, "test_TSS.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.gff_file)
        datas = import_data(os.path.join(self.out_gff, "test_transcript.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)

    def test_run_transcript_assembly(self):
        tr.stat_ta_tss = self.mock.mock_stat_ta_tss
        tr.stat_ta_gff = self.mock.mock_stat_ta_gff
        tr.longer_ta = self.mock.mock_longer_ta
        tr.fill_gap = self.mock.mock_fill_gap
        tr.combine = self.mock.mock_combine
        pre_assembly = tr.detect_transcript
        tr.assembly = self.mock.mock_assembly
        tr.gen_table_transcript = self.mock.mock_gen_table_tran
        gen_file(os.path.join(self.frag, "tmp/test1_forward.wig"),
                 self.example.wig_f)
        gen_file(os.path.join(self.frag, "tmp/test1_reverse.wig"),
                 self.example.wig_r)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.terms, "test_term.gff"),
                 self.example.term_file)
        gen_file("test_folder/output/test1_fragment", self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(os.path.join(
            gff_out, "test_transcript_assembly_fragment.gff"),
            self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.frag_wigs = self.frag
        args.tex_wigs = None
        args.flibs = "flibs"
        args.tlibs = "tlibs"
        args.gffs = self.gffs
        args.terms = None
        args.compare_tss = None
        args.c_feature = None
        args.fuzzy_term = 1
        args.max_dist = 2000
        args.modify = "merge_overlap"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran.run_transcript(args, log)
        tr.assembly = pre_assembly
Пример #36
0
class TestCoverageDetection(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()

    def test_coverage_comparison_first(self):
        first = True
        cover_sets = {"high": -1, "low": -1}
        poss = {"high": -1, "low": -1}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})

    def test_coverage_comparison_forward(self):
        first = False
        cover_sets = {"high": 50, "low": 20}
        poss = {"high": 10, "low": 30}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})
        cover = 30
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         51)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 30})
        self.assertDictEqual(poss, {"high": 50, "low": 51})

    def test_coverage_comparison_reverse(self):
        first = False
        cover_sets = {"high": 50, "low": 20}
        poss = {"high": 30, "low": 10}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})
        cover = 30
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-",
                                         49)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 30})
        self.assertDictEqual(poss, {"high": 50, "low": 49})

    def test_define_cutoff_median(self):
        coverages = {"3utr": "mean", "5utr": "median"}
        median = {
            "track_a": {
                "median": 100,
                "mean": 200
            },
            "track_b": {
                "median": 30,
                "mean": 80
            }
        }
        cutoff = cover_detect.define_cutoff(coverages, median, "5utr")
        self.assertDictEqual(cutoff, {'track_a': 100, 'track_b': 30})
        cutoff = cover_detect.define_cutoff(coverages, median, "3utr")
        self.assertDictEqual(cutoff, {'track_a': 200, 'track_b': 80})

    def test_check_tex(self):
        template_texs = self.example.texs
        covers = self.example.cover_datas
        coverages = {"3utr": 100, "5utr": 600}
        poss = {"high": 30, "low": 10}
        median = {
            "track1_tex": {
                "median": 100,
                "mean": 200
            },
            "track1_notex": {
                "median": 30,
                "mean": 80
            },
            "track2_tex": {
                "median": 150,
                "mean": 200
            },
            "track2_notex": {
                "median": 10,
                "mean": 20
            },
            "frag": {
                "median": 80,
                "mean": 100
            }
        }
        target_datas = []
        texs = {
            "track1_tex@AND@track1_notex": 0,
            "track2_tex@AND@track2_notex": 0
        }
        detect_num_lib = cover_detect.check_tex(template_texs, covers,
                                                target_datas, 20, None, poss,
                                                median, coverages, "3utr", 200,
                                                2)
        self.assertEqual(detect_num_lib, 2)
        num_frag = 0
        num_tex = 0
        for target in target_datas:
            if target["type"] == "frag":
                num_frag += 1
            else:
                num_tex += 1
        self.assertEqual(num_frag, 1)
        self.assertEqual(num_tex, 2)
        detect_num_lib = cover_detect.check_tex(template_texs, covers,
                                                target_datas, 20,
                                                "sRNA_utr_derived", poss,
                                                median, coverages, "5utr", 200,
                                                2)
        self.assertEqual(detect_num_lib, 2)
        self.assertDictEqual(poss, {
            'start': 100,
            'high': 30,
            'end': 202,
            'low': 10
        })

    def test_replicate_comparison(self):
        cover_detect.check_tex = Mock_func().mock_check_tex
        template_texs = self.example.texs
        srna_covers = {"texnotex": self.example.cover_datas}
        coverages = {"3utr": 100, "5utr": 600}
        median = {
            "track1_tex": {
                "median": 100,
                "mean": 200
            },
            "track1_notex": {
                "median": 30,
                "mean": 80
            },
            "track2_tex": {
                "median": 150,
                "mean": 200
            },
            "track2_notex": {
                "median": 10,
                "mean": 20
            },
            "frag": {
                "median": 80,
                "mean": 100
            }
        }
        texs = {
            "track1_tex@AND@track1_notex": 0,
            "track2_tex@AND@track2_notex": 0
        }
        args = self.mock_args.mock()
        args.replicates = {"tex": ["all_2"], "frag": ["all_1"]}
        args.tex_notex = 2
        srna_datas = cover_detect.replicate_comparison(args, srna_covers, "+",
                                                       "sRNA_utr_derived",
                                                       median, coverages,
                                                       "3utr", 100, 200,
                                                       template_texs)
        self.assertEqual(srna_datas["best"], 500)
        self.assertEqual(srna_datas["track"], "frag")
        self.assertEqual(srna_datas["high"], 700)
        self.assertEqual(srna_datas["low"], 400)
        self.assertEqual(srna_datas["start"], 100)
        self.assertEqual(srna_datas["end"], 202)
class TestGensRNAOutput(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_info(self):
        blasts = [{"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "111"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "222"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "333"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "444"},
                  {"strain": "bbb", "strand": "+", "start": 20, "end": 70, "hits": "555"}]
        merge = gso.merge_info(blasts)
        self.assertDictEqual(merge[0], {'hits': '111;222;333', 'start': 20, 'strand': '+', 'strain': 'aaa', 'end': 70})
        self.assertDictEqual(merge[1], {'hits': '555', 'start': 20, 'strand': '+', 'strain': 'bbb', 'end': 70})

    def test_compare_srna_table(self):
        final = {"energy": -23, "utr": "3UTR"}
        srna_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 300,
                     "end": 367, "phase": ".", "strand": "+", "score": "."}
        attributes_srna = {"ID": "srna0", "Name": "sRNA_0"}
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        new_final = gso.compare_srna_table(self.example.srna_tables, srna, final, args)
        self.assertDictEqual(new_final, {'end_pro': 'NA', 'strand': '+', 'strain': 'aaa',
                                         'avg': 100, 'type': 'TEX+/-;Fragmented',
                                         'conds': 'tex_frag', 'candidates': '300-367',
                                         'tss_pro': 'TSS:300_+', 'start': 300, 'utr': '3UTR',
                                         'energy': -23, 'end': 367})

    def test_compare(self):
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        finals = gso.compare(self.example.srnas, self.example.srna_tables,
                             self.example.nr_blasts, self.example.srna_blasts, args)
        for index in range(len(finals)):
            self.assertDictEqual(finals[index], self.example.finals[index])

    def test_gen_best_srna(self):
        gso.read_gff = Mock_func().mock_read_gff
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.nr_hits_num = 0
        args.energy = 0
        args.import_info = ["tss", "sec_str"]
        args.all_hit = True
        args.best_sorf = True
        args.best_promoter = True
        args.best_term = True
        out_file = os.path.join(self.test_folder, "test.out")
        gso.gen_best_srna("test.srna", out_file, args)
        with open(out_file) as fh:
            for line in fh:
                if not (line.startswith("#")):
                    data = "\t".join(line.split("\t")[:-1])
        self.assertEqual(data, "aaa\tUTR_derived\tsRNA\t300\t367\t.\t+\t.")
Пример #38
0
class TestsORFDetection(unittest.TestCase):
    def setUp(self):
        self.mock = Mock_func()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.tsss = "test_folder/tsss"
        self.srnas = "test_folder/sRNA"
        self.out = "test_folder/output"
        self.trans = "test_folder/trans"
        self.fastas = "test_folder/fastas"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.gffs = "test_folder/gffs"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(self.out)
            os.mkdir(self.trans)
            os.mkdir(self.fastas)
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(self.srnas)
            os.mkdir(self.gffs)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.srnas = self.srnas
        args.out_folder = self.out
        args.trans = self.trans
        args.fastas = self.fastas
        self.sorf = sORFDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_start_stop_codon(self):
        gff_path = os.path.join(self.out, "gffs")
        table_path = os.path.join(self.out, "tables")
        os.mkdir(gff_path)
        os.mkdir(table_path)
        os.mkdir(os.path.join(gff_path, "all_candidates"))
        os.mkdir(os.path.join(table_path, "all_candidates"))
        os.mkdir(os.path.join(gff_path, "best_candidates"))
        os.mkdir(os.path.join(table_path, "best_candidates"))
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.gff"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.csv"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.gff"),
                 "test")
        gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.csv"),
                 "test")
        so.sorf_detection = self.mock.mock_sorf_detection
        args = self.mock_args.mock()
        args.libs = "libs"
        args.tex_notex = "tex_notex"
        args.replicates = "replicates"
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.background = "background"
        args.wig_path = "wig_path"
        args.merge_wigs = "merge_wigs"
        self.sorf._start_stop_codon(["test"], args)
        self.assertTrue(
            os.path.exists(
                os.path.join(gff_path, "best_candidates/test_sORF.gff")))
        self.assertTrue(
            os.path.exists(
                os.path.join(gff_path, "all_candidates/test_sORF.gff")))
        self.assertTrue(
            os.path.exists(
                os.path.join(table_path, "best_candidates/test_sORF.csv")))
        self.assertTrue(
            os.path.exists(
                os.path.join(table_path, "all_candidates/test_sORF.csv")))

    def test_compare_tran_cds(self):
        so.get_intergenic = self.mock.mock_get_intergenic
        gen_file(os.path.join(self.test_folder, "test.gff"), "test")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.gffs = self.test_folder
        args.hypo = False
        args.utr_detect = True
        prefixs = self.sorf._compare_tran_cds(args)
        self.assertListEqual(prefixs, ["test"])

    def test_run_sorf_detection(self):
        gff_path = os.path.join(self.out, "gffs")
        table_path = os.path.join(self.out, "tables")
        os.mkdir(gff_path)
        os.mkdir(table_path)
        os.mkdir(os.path.join(gff_path, "all_candidates"))
        os.mkdir(os.path.join(table_path, "all_candidates"))
        os.mkdir(os.path.join(gff_path, "best"))
        os.mkdir(os.path.join(table_path, "best"))
        so.get_intergenic = self.mock.mock_get_intergenic
        so.sorf_detection = self.mock.mock_sorf_detection
        self.sorf._remove_tmp = self.mock.mock_remove_tmp
        self.sorf._check_gff = self.mock.mock_check_gff
        self.sorf._check_necessary_files = self.mock.mock_check_necessary_files
        self.sorf.multiparser = Mock_Multiparser()
        args = self.mock_args.mock()
        args.trans = self.trans
        args.gffs = self.gffs
        args.tsss = self.tsss
        args.out_folder = self.out
        args.libs = "libs"
        args.tex_notex = "tex_notex"
        args.replicates = "replicates"
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.background = "background"
        args.wig_path = "wig_path"
        args.merge_wigs = "merge_wigs"
        args.fuzzy_rbs = 2
        self.sorf.run_sorf_detection(args)
Пример #39
0
class TestRibos(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.fastas = os.path.join(self.test_folder, "fastas")
        self.out_folder = os.path.join(self.test_folder, "output")
        self.database = os.path.join(self.test_folder, "database")
        self.seq_path = os.path.join(self.test_folder, "seqs")
        self.tables = os.path.join(self.out_folder, "tables")
        self.stat = os.path.join(self.out_folder, "statistics")
        self.scan = os.path.join(self.test_folder, "scan")
        self.tsss = os.path.join(self.test_folder, "tsss")
        self.trans = os.path.join(self.test_folder, "trans")
        self.out_gff = os.path.join(self.out_folder, "gffs")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.out_folder)
            os.mkdir(self.database)
            os.mkdir(self.seq_path)
            os.mkdir(os.path.join(self.out_folder, "tmp_table"))
            os.mkdir(os.path.join(self.out_folder, "tmp_scan"))
            os.mkdir(os.path.join(self.out_folder, "tmp_fasta"))
            os.mkdir(os.path.join(self.out_folder, "scan_Rfam"))
            os.mkdir(self.tables)
            os.mkdir(self.scan)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.database = self.database
        args.tsss = self.tsss
        args.trans = self.trans
        self.ribo = Ribos(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_scan_extract_rfam(self):
        self.ribo._run_infernal = self.mock.mock_run_infernal
        rb.modify_table = self.mock.mock_modify_table
        prefixs = []
        gen_file(os.path.join(self.gffs, "tmp/test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/test.fa"), self.example.fasta_file)
        gen_file(os.path.join(self.seq_path, "test.fa"), self.example.fasta_file)
        gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"), self.example.tss_file)
        gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"), self.example.tran_file)
        gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"), self.example.fasta_file)
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.fuzzy = 5
        args.fuzzy_rbs = 2
        args.utr = True
        args.output_all = "test"
        self.ribo._scan_extract_rfam(prefixs, args)
        self.assertListEqual(prefixs, ["test"])
        self.assertTrue(os.path.exists(os.path.join(self.out_folder, "tmp_fasta", "test_regenerate.fa")))

    def test_merge_results(self):
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) 
        gen_file(os.path.join(self.out_folder, "tmp_table/test_riboswitch.csv"), self.example.table)
        gen_file(os.path.join(self.out_folder, "tmp_scan/test_riboswitch_prescan.txt"), self.example.rescan_file)
        gen_file(os.path.join(self.out_folder, "tmp_scan/test_riboswitch_scan.txt"), self.example.rescan_file)
        gen_file(os.path.join(self.test_folder, "ids"), self.example.ids)
        gen_file(os.path.join(self.tables, "test_riboswitch.csv"), self.example.table)
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.ribos_id = os.path.join(self.test_folder, "ids")
        args.fuzzy = 3
        self.ribo._merge_results(args)
Пример #40
0
class TestsORFDetection(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.fasta = "test_folder/fasta"
        self.wigs = "test_folder/wig"
        self.gff = "test_folder/gff"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.wigs)
            os.mkdir(self.gff)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_coverage(self):
        coverages = {"3utr": "median", "5utr": "median",
                     "inter": 5, "interCDS": "median"}
        medianlist = {"aaa": {"3utr": {"track_1": {"median": 3}},
                              "5utr": {"track_1": {"median": 6}},
                              "interCDS": {"track_1": {"median": 2}},
                              "inter": {"track_1": {"median": 5}}}}
        cutoffs = {"track_1": 0}
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": [1]}
        covers = sd.get_coverage(sorf, self.example.wigs, "+", coverages,
                                 medianlist, cutoffs)
        self.assertDictEqual(covers, {'frag_1': [
            {'low': 2, 'avg': 33.4, 'high': 100, 'pos': 2,
             'track': 'track_1', 'type': 'frag'}]})

    def test_detect_rbs_site(self):
        args = self.mock_args.mock()
        args.max_len = 20
        args.min_len = 3
        args.fuzzy_rbs = 2
        detect = sd.detect_rbs_site("AGGAGGCCGCTATGCCACACGT", 2,
                                    self.example.tas[0], args)
        self.assertListEqual(detect, [1])

    def test_detect_start_stop(self):
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        args = self.mock_args.mock()
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.max_len = 20
        args.min_len = 3
        args.fuzzy_rbs = 2
        sorf = sd.detect_start_stop(self.example.tas, seq, args)
        self.assertListEqual(sorf, [
            {'strand': '+', 'type': 'intergenic', 'starts': ['13'],
            'print': False, 'seq': 'ATGCCATTA', 'ends': ['21'],
            'end': 21, 'start': 13, 'rbs': [2], 'strain': 'aaa'}])
        seq = {"aaa": "TTAAAGGCATTATCCTCCTA"}
        self.example.tas[0].strand = "-"
        sorf = sd.detect_start_stop(self.example.tas, seq, args)
        self.assertListEqual(sorf, [
            {'end': 10, 'starts': ['2'], 'strain': 'aaa', 'ends': ['10'],
            'type': 'intergenic', 'print': False, 'seq': 'TAAAGGCAT',
            'rbs': [19], 'strand': '-', 'start': 2}])
        self.example.tas[0].strand = "+"

    def test_read_data(self):
        inter = os.path.join(self.test_folder, "inter")
        fasta = os.path.join(self.test_folder, "fa")
        gen_file(inter, self.example.inter)
        gen_file(fasta, ">aaa\nATATACCGATC")
        inters, tsss, srnas, seq = sd.read_data(inter, None, None, fasta, True)
        self.assertEqual(inters[0].start, 2)
        self.assertDictEqual(seq, {'aaa': 'ATATACCGATC'})

    def test_check_tss(self):
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": [1], "with_TSS": []}
        checks = {"start": False, "rbs": False, "import": False}
        sd.check_tss(sorf, self.example.tsss[0], 300, checks)
        self.assertDictEqual(checks, {'start': True, 'rbs': [1],
                                      'import': True})

    def test_compare_sorf_tss(self):
        sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [1]}]
        args = self.mock_args.mock()
        args.utr_length = 300
        args.noafter_tss = False
        args.no_tss = False
        sorfs_all, sorfs_best = sd.compare_sorf_tss(
            sorfs, self.example.tsss, "tss", args)
        self.assertListEqual(sorfs_all, [
            {'print': False, 'ends': ['10'], 'strand': '+',
             'end': 6, 'type': '3utr', 'starts': ['2'], 'seq': 'ATGTA',
             'strain': 'aaa', 'start': 2, 'rbs': [1],
             'start_TSS': '1_+', 'with_TSS': ['TSS:1_+']}])
        self.assertListEqual(sorfs_best, [
            {'print': False, 'ends': ['10'], 'strand': '+',
             'end': 6, 'type': '3utr', 'starts': ['2'], 'seq': 'ATGTA',
             'strain': 'aaa', 'start': 2, 'rbs': [1],
             'with_TSS': ['TSS:1_+'], 'start_TSS': '1_+'}])

    def test_compare_sorf_srna(self):
        sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [1]}]
        sd.compare_sorf_srna(sorfs, self.example.srnas, "test")
        self.assertListEqual(sorfs, [
            {'print': False, 'starts': ['2'], 'seq': 'ATGTA', 'strand': '+',
             'srna': ['sRNA:5-8_+'], 'end': 6, 'rbs': [1], 'ends': ['10'],
             'start': 2, 'strain': 'aaa', 'type': '3utr'}])

    def test_import_overlap(self):
        sorf1 = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"}
        sorf2 = {"strain": "aaa", "strand": "+", "start": 5, "end": 15,
                 "starts": [str(5)], "ends": [str(15)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [2], "start_TSS": "2"}
        final = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"}
        sd.import_overlap(sorf2, final, sorf1, True)
        self.assertDictEqual(final, {
            'end': 15, 'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'],
            'start': 2, 'rbs': [1, 2], 'strand': '+', 'strain': 'aaa',
            'print': False, 'seq': 'ATGTA', 'ends': ['10', '15'],
            'start_TSS': '1', 'type': '3utr', 'starts': ['2', '5']})

    def test_merge(self):
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                  "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                  "type": "3utr", "print": False, "rbs": [1],
                  "start_TSS": "1"},
                 {"strain": "aaa", "strand": "+", "start": 5, "end": 15,
                  "starts": [str(5)], "ends": [str(15)], "seq": "ATGTA",
                  "type": "3utr", "print": False, "rbs": [2],
                  "start_TSS": "2"}]
        finals = sd.merge(sorfs, seq)
        self.assertDictEqual(finals[0], {
            'start_TSS': '1', 'rbs': [1, 2], 'strand': '+', 'strain': 'aaa',
            'start': 2, 'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'],
            'ends': ['10', '6', '15'], 'starts': ['2', '5'], 'type': '3utr',
            'end': 15, 'seq': 'AGGAGGCCGCTATG'})

    def test_assign_utr_cutoff(self):
        coverages = {"3utr": "median", "5utr": 20,
                     "interCDS": 11, "intergenic": 59}
        medians = {"median": 50, "mean": 20}
        cutoff =sd.assign_utr_cutoff(coverages, "3utr", medians)
        self.assertEqual(cutoff, 50)

    def test_get_cutoff(self):
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"}
        coverages = {"3utr": "median", "5utr": 20,
                     "interCDS": 11, "intergenic": 59}
        medians = {"aaa": {"3utr": {"track_1": {"median": 50, "mean": 20}}}}
        cutoff = sd.get_cutoff(sorf, "track_1", coverages, medians)
        self.assertEqual(cutoff, 50)

    def test_get_attribute(self):
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1",
                "with_TSS": "NA", "srna": "NA", "shift": 1}
        string = sd.get_attribute(1, "sORF_1", "4", sorf, "utr")
        self.assertEqual(
            string,
            "ID=aaa_sorf1;Name=sORF_sORF_1;start_TSS=4;with_TSS=N,A;sORF_type=3utr;sRNA=N,A;rbs=1;frame_shift=1")

    def test_print_file(self):
        out_g = StringIO()
        out_t = StringIO()
        sorf = {"strain": "aaa", "strand": "+", "start": 10, "end": 15,
                "starts": [str(10)], "ends": [str(15)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": ["3"], "start_TSS": "1",
                "with_TSS": ["NA"], "srna": ["NA"], "candidate": ["AAA"],
                "shift": 1}
        sorf_datas = {"best": 20, "high": 50, "low": 10, "start": 1,
                      "end": 10, "track": "track_1", "detail": [],
                      "conds": {"frag": "track_1"}}
        args = self.mock_args.mock()
        args.table_best = True
        args.print_all = True
        sd.print_file(sorf, sorf_datas, 1, out_g, out_t, "best", args)
        self.assertEqual(
            out_g.getvalue(),
            "aaa\tANNOgesic\tsORF\t10\t15\t.\t+\t.\tID=aaa_sorf1;Name=sORF_00001;start_TSS=1;with_TSS=NA;sORF_type=3utr;sRNA=NA;rbs=RBS_3;frame_shift=1\n")
        self.assertEqual(
            out_t.getvalue(),
            "aaa\tsORF_00001\t10\t15\t+\t3'UTR_derived\tNA\tRBS_3\t10\t15\tNA\t1\tFragmented\t20\t50\t10\ttrack_1(avg=20;high=50;low=10)\tATGTA\tAAA\n")

    def test_print_table(self):
        out_t = StringIO()
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1",
                "with_TSS": ["NA"], "srna": ["NA"], "candidate": ["AAA"],
                "shift": 1}
        sorf_datas = {"best": 20, "high": 50, "low": 10, "start": 1,
                      "end": 10, "track": "track_1", "detail": [],
                      "conds": {"frag": "track_1"}}
        args = self.mock_args.mock()
        args.table_best = True
        args.print_all = True
        sd.print_table(out_t, sorf, "test", "3utr", "frag", sorf_datas, args)
        self.assertEqual(
            out_t.getvalue(),
            "aaa\tsORF_test\t2\t6\t+\t3utr\tNA\t1\t2\t10\tNA\t1\tfrag\t20\t50\t10\ttrack_1(avg=20;high=50;low=10)\tATGTA\tAAA\n")

    def test_get_inter_coverage(self):
        inter_covers = {}
        inters = [{"frag": [{"track": "track_1", "avg": 22}]}]
        sd.get_inter_coverage(inters, inter_covers)
        self.assertDictEqual(inter_covers, {'track_1': [22]})

    def test_detect_utr_type(self):
        ta_dict = [{"seq_id": "aaa", "source": "intergenic",
                    "feature": "Transcript", "start": 1,
                    "end": 23, "phase": ".", "strand": "+", "score": "."}]
        attributes_tas = [{"ID": "tran0", "Name": "Transcript_0",
                           "UTR_type": "intergenic"}]
        tas = []
        tas.append(Create_generator(ta_dict[0], attributes_tas[0], "gff"))
        sd.get_coverage = self.mock.mock_get_coverage
        med_inters = {"aaa": {"intergenic": []}}
        sd.detect_utr_type(tas[0], "intergenic", med_inters,
                           "wigs", "+", "test")
        sd.get_coverage = get_coverage
        self.assertDictEqual(med_inters, {'aaa': {'intergenic': ["2"]}})

    def test_median_score(self):
        num = sd.median_score([1, 3, 11, 42, 2, 32, 111], "p_0.5")
        self.assertEqual(num, 11)

    def test_mean_score(self):
        num = sd.mean_score([1, 3, 11, 42, 2, 32, 111])
        self.assertEqual(num, 28.857142857142858)

    def test_validate_tss(self):
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "3",
                "with_TSS": ["TSS:3_+"], "srna": ["NA"], "candidate": ["AAA"]}
        datas = sd.validate_tss([2], [6], sorf, 300)
        self.assertEqual(datas, (['TSS:3_+'], 'NA'))

    def test_validate_srna(self):
        sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1",
                "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"],
                "candidate": ["AAA"]}
        srnas = sd.validate_srna([2], [6], sorf)
        self.assertListEqual(srnas, ['sRNA:2-5_+'])

    def test_get_best(self):
        sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1",
                 "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"],
                 "candidate": ["2-6_TSS:3_RBS:1"]}]
        args = self.mock_args.mock()
        args.table_best = True
        args.no_srna = True
        args.utr_length = 300
        data = sd.get_best(sorfs, "tss", "srna", args)
        self.assertListEqual(data, [
            {'type': '3utr', 'strand': '+', 'print': False,
             'with_TSS': ['TSS:3_+'], 'starts': ['2'], 'start': 2,
             'srna': ['sRNA:2-5_+'], 'rbs': ['1'], 'end': 6, 'seq': 'ATGTA',
             'start_TSS': '1', 'strain': 'aaa', 'ends': ['10'],
             'candidate': ['2-6_TSS:3_RBS:1']}])

    def test_coverage_and_output(self):
        out_t = StringIO()
        out_g = StringIO()
        sd.get_coverage = self.mock.mock_get_coverage
        sd.replicate_comparison = self.mock.mock_replicate_comparison
        sorfs = [{"strain": "aaa", "strand": "+", "start": 10, "end": 15,
                 "starts": [str(10)], "ends": [str(15)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1",
                 "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"],
                 "candidate": ["2-6_TSS:3_RBS:1"]}]
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        wigs = {"forward": "wigs_f", "reverse": "wigs_r"}
        args = self.mock_args.mock()
        args.print_all = True
        args.min_rbs = 0
        args.max_rbs = 20
        args.min_len = 0
        args.max_len = 300
        args.table_best = True
        sd.coverage_and_output(sorfs, "median", wigs, out_g, out_t,
                               "best", seq, "cover", args, "texs", "final")
        sd.get_coverage = copy.deepcopy(get_coverage)
        self.assertEqual(out_g.getvalue(), (
            "##gff-version 3\naaa\tANNOgesic\tsORF\t10\t15\t."
            "\t+\t.\tID=aaa_sorf0;Name=sORF_00000;start_TSS=1;"
            "with_TSS=TSS:3_+;sORF_type=3utr;sRNA=NA;rbs=RBS_1;frame_shift=1\n"))
        self.assertEqual(out_t.getvalue().split("\n")[1],
                         ("aaa\tsORF_00000\t10\t15\t+\t"
                          "3'UTR_derived\tTSS:3_+\tRBS_1\t10\t15\tNA\t1"
                          "\tFragmented\t20\t50\t10\ttrack_1(avg=20;"
                          "high=50;low=10)\tGCTATG\t10-15_TSS:3_+_RBS:1"))

    def test_detect_inter_type(self):
        inter_dict = [{"seq_id": "aaa", "source": "UTR_derived",
                       "feature": "Transcript", "start": 1,
                       "end": 23, "phase": ".", "strand": "+", "score": "."}]
        attributes_inter = [{"ID": "tran0", "Name": "Transcript_0",
                             "UTR_type": "3utr"}]
        inters = []
        inters.append(Create_generator(
            inter_dict[0], attributes_inter[0], "gff"))
        sd.get_coverage = self.mock.mock_get_coverage
        wigs = {"forward": "wigs_f", "reverse": "wigs_r"}
        data = sd.detect_inter_type(inters, wigs, "test")
        self.assertDictEqual(data, {'aaa': {
            'interCDS': [], '5utr': [], '3utr': ['2']}})
        sd.get_coverage = copy.deepcopy(get_coverage)

    def test_set_median(self):
        mediandict = {}
        covers = {"aaa": {"3utr": {"track_1": [1, 3, 4, 2, 55]}}}
        coverages = {"3utr": "p_0.5", "5utr": "p_0.5", "interCDS": "n_100"}
        sd.set_median(covers, mediandict, coverages)
        self.assertDictEqual(mediandict, {'aaa': {
            '5utr': {}, 'interCDS': {}, '3utr': {'track_1': {'median': 3}}}})

    def test_compute_candidate_best(self):
        sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6,
                 "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA",
                 "type": "3utr", "print": False, "rbs": ["1"],
                 "start_TSS": "1",
                 "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"]}]
        sd.compute_candidate_best(sorfs)
        self.assertListEqual(sorfs, [
            {'starts': ['2'], 'seq': 'ATGTA', 'strain': 'aaa',
            'ends': ['10'], 'print': False, 'rbs': ['1'], 'type': '3utr',
            'end': 6, 'start': 2, 'srna': ['sRNA:2-5_+'],
            'candidate': ['2-6_TSS:1_RBS:1'], 'start_TSS': '1',
            'strand': '+', 'with_TSS': ['TSS:3_+']}])

    def test_sorf_detection(self):
        fasta = os.path.join(self.fasta, "fasta")
        gen_file(fasta, ">aaa\nTAGGAGGCCGCTATGCCATTA")
        srna_gff = os.path.join(self.gff, "srna.gff")
        inter_gff = os.path.join(self.gff, "inter.gff")
        tss_file = os.path.join(self.gff, "tss.gff")
        sd.get_coverage = self.mock.mock_get_coverage
        sd.read_libs = self.mock.mock_read_libs
        sd.read_wig = self.mock.mock_read_wig
        sd.get_inter_coverage = self.mock.mock_get_inter_coverage
        gen_file(srna_gff, self.example.srna)
        gen_file(inter_gff, self.example.inter)
        gen_file(tss_file, self.example.tss)
        args = self.mock_args.mock()
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.cutoff_5utr = "p_0.5"
        args.cutoff_intercds = "n_20"
        args.cutoff_3utr = "n_11"
        args.cutoff_inter = 50
        args.cutoff_anti = 50
        args.libs = ["frag:frag:1:a:+"]
        args.merge_wigs = "wig_folder"
        args.utr_detect = True
        args.background = 10
        args.print_all = True
        sd.sorf_detection(fasta, srna_gff, inter_gff, tss_file, "wig_f_file",
                          "wig_r_file", "test_folder/test", args)
        sd.get_coverage = copy.deepcopy(get_coverage)
        sd.replicate_comparison = self.mock.mock_replicate_comparison
        self.assertTrue(os.path.exists("test_folder/test_all.csv"))
        self.assertTrue(os.path.exists("test_folder/test_all.gff"))
        self.assertTrue(os.path.exists("test_folder/test_best.csv"))
        self.assertTrue(os.path.exists("test_folder/test_best.gff"))
Пример #41
0
class TestSNPCalling(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/statistics"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/SNP_raw_outputs"))
        args = self.mock_args.mock()
        args.types = "related_genome"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp_folder = os.path.join(
                 self.test_folder,
                 "compare_related_and_reference_genomes/SNP_raw_outputs/test")
        os.mkdir(snp_folder)
        snp = os.path.join(snp_folder, "test_with_BAQ_NC_007795.1.vcf")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depthNC_007795.1")
        gen_file(depth_file, self.example.depth_file)
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 1,
                      "bams": "test", "rep": 1}]
        self.snp._transcript_snp(fasta, "test", "with",
                                 "test", bam_datas, self.table, args,)
        datas = import_data(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/statistics/stat_test_with_BAQ_NC_007795.1_SNP_best.csv"))
        print("\n".join(datas))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/seqs/with_BAQ/test/test_NC_007795.1_NC_007795.1_1_1.fa"))
        self.assertEqual("\n".join(datas),
                         ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_NC_007795.1_seq_reference.csv")
        os.remove("test_NC_007795.1_best.vcf")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 1,
                      "bams": "test", "rep": 1}]
        args.program = ["with_BAQ"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._run_program("fasta", bam_datas, args, log)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 0,
                      "bams": "test", "rep": 1}]
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.bams = [args.frag_bams, args.normal_bams]
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._merge_bams(args, bam_datas, log)
        self.assertEqual(bam_datas[0]["bam_number"], 1)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 0,
                      "bams": "test", "rep": 1}]
        seq_names = self.snp._get_genome_name(args, bam_datas)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"),
                              ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"),
                 "test")
        gen_file(os.path.join(self.test_folder, "tmp_bcf"), "test")
        gen_file(os.path.join(self.fasta, "all.fa.fai"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["with_BAQ"]
        args.bams = ["a1:" + os.path.join(self.test_folder, "frag_bams"),
                     "a2:" + os.path.join(self.test_folder, "tex_bams")]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp.run_snp_calling(args, log)
Пример #42
0
class TestsRNADetection(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.sorf = "test_folder/sORF"
        self.out = "test_folder/output"
        self.trans = "test_folder/trans"
        self.fastas = "test_folder/fastas"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.pros = "test_folder/pros"
        self.terms = "test_folder/terms"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.out)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(self.pros)
            os.mkdir(os.path.join(self.pros, "tmp"))
            os.mkdir(self.sorf)
            os.mkdir(os.path.join(self.sorf, "tmp"))
            os.mkdir(self.terms)
        args = self.mock_args.mock()
        args.tss_folder = self.tsss
        args.pro_folder = self.pros
        args.out_folder = self.out
        args.sorf_file = self.sorf
        args.fastas = self.fastas
        args.trans = self.trans
        args.terms = self.terms
        self.srna = sRNADetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.chdir(current_path)
        if os.path.exists("tmp"):
            shutil.rmtree("tmp")
        if os.path.exists("tmp_srna.csv"):
            os.remove("tmp_srna.csv")
        if os.path.exists("tmp_srna.gff"):
            os.remove("tmp_srna.gff")
        if os.path.exists("tmp_blast.txt"):
            os.remove("tmp_blast.txt")

    def test_check_folder_exist(self):
        path_ = self.srna._check_folder_exist(self.sorf)
        self.assertEqual(path_, "test_folder/sORF/tmp")

    def test_formatdb(self):
        database = "test_folder/test.fa"
        gen_file(database, "test")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        sr.change_format = self.mock.mock_change_format
        self.srna._run_format = self.mock.mock_run_format
        self.srna._formatdb(database, "type_", self.out, "blast_path", "sRNA", log)
        self.assertTrue(os.path.exists(os.path.join(self.out, "log.txt")))

    def test_check_necessary_file(self):
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        self.srna._check_database = self.mock.mock_check_database
        args = self.mock_args.mock()
        args.trans = self.trans
        args.tsss = self.tsss
        args.pros = self.pros
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.fastas = self.fastas
        args.terms = self.terms
        args.sorf_file = self.sorf
        args.gffs = self.gffs
        args.tex_wigs = self.tex
        args.frag_wigs = self.frag
        args.utr_srna = True
        args.nr_format = True
        args.srna_format = True
        args.nr_database = "test"
        args.srna_database = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.srna._check_necessary_file(args, log)

    def test_run_program(self):
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        self.srna._run_normal = self.mock.mock_run_normal
        self.srna._run_utrsrna = self.mock.mock_run_utrsrna
        self.srna._merge_tex_frag_datas = self.mock.mock_merge_tex_frag_datas
        sr.filter_frag = self.mock.mock_run_filter_frag
        sr.merge_srna_gff = self.mock.mock_merge_srna_gff
        sr.merge_srna_table = self.mock.mock_merge_srna_table
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.sorf_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"),
                 self.example.sorf_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.sorf_file)
        gen_file(os.path.join(self.tsss, "test_processing.gff"),
                 self.example.sorf_file)
        fuzzy_tsss = {"inter": 3}
        args = self.mock_args.mock()
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.trans = self.trans
        args.tsss = self.tsss
        args.pros = self.pros
        args.max_len = 300
        args.min_len = 30
        args.tex_notex = "tex_notex"
        args.fuzzy_tsss = fuzzy_tsss
        args.out_folder = self.out
        args.table_best = True
        args.wig_path = "wig_path"
        args.merge_wigs = "merge"
        args.libs = "libs"
        args.gffs = self.gffs
        args.in_cds = False
        args.utr_srna = True
        args.ex_srna = False
        args.cutoff_overlap = 0.5
        args.source = True
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        prefixs = self.srna._run_program(args, log)
        self.assertListEqual(prefixs, ['test'])

    def test_get_seq_sec(self):
        sr.extract_energy = self.mock.mock_extract_energy
        self.srna.helper.get_seq = self.mock.mock_get_seq
        self.srna._run_RNAfold = self.mock.mock_run_RNAfold
        os.mkdir(os.path.join(self.out, "tmp_srna"))
        gen_file(os.path.join(self.fastas, "test.fa"), ">test\nAAATTTGGGCCC")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        datas = self.srna._get_seq_sec(
            self.fastas, self.out, "test", self.test_folder,
            self.test_folder, "vienna_path", log)
        self.assertEqual(datas["sec"].split("/")[-1], "test_folder")
        self.assertEqual(datas["dot"].split("/")[-1], "test_folder")
        self.assertEqual(datas["main"].split("/")[-1],
                         datas["tmp"].split("/")[-4])
        self.assertEqual(datas["tmp"].split("/")[-1], "tmp_srna")

    def test_replot_sec(self):
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        gen_file(os.path.join(self.tsss, "test.rss.ps"), "test")
        gen_file(os.path.join(self.tsss, "test.dp.ps"), "test")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        tmp_paths = {"dot": self.out, "sec": self.fastas, "tmp": self.tsss}
        self.srna._replot_sec("vienna_util", tmp_paths, "test", log)
        self.assertTrue(os.path.exists(os.path.join(
            tmp_paths["dot"], "test/test.dp.ps")))
        self.assertTrue(os.path.exists(os.path.join(
            tmp_paths["sec"], "test/test.rss.ps")))

    def test_plot_mountain(self):
        self.srna._run_mountain = self.mock.mock_run_mountain
        tmp_paths = {"main": self.test_folder, "tmp": self.tsss,
                     "dot": self.sorf}
        moun_path = "fastas"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        os.mkdir(os.path.join(tmp_paths["dot"], "test"))
        gen_file(os.path.join(tmp_paths["dot"], "test/test.dp.ps"), "test")
        self.srna._plot_mountain(True, moun_path,
                                 tmp_paths, "test", "vienna_util", log)
        self.assertTrue("test_folder/fastas/test/test.mountain.pdf")

    def test_compute_2d_and_energy(self):
        sr.extract_energy = self.mock.mock_extract_energy
        sr.change_format = self.mock.mock_change_format
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        self.srna._run_mountain = self.mock.mock_run_mountain
        sec_path = os.path.join(self.out, "figs")
        os.mkdir(sec_path)
        os.mkdir(os.path.join(sec_path, "sec_plots"))
        os.mkdir(os.path.join(sec_path, "dot_plots"))
        os.mkdir(os.path.join(sec_path, "mountain_plots"))
        tmp_paths = {"dot": self.out, "sec": self.fastas,
                     "tmp": self.tsss, "main": self.test_folder}
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_energy_test"), "test")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.fastas = self.fastas
        args.rnafold = "test"
        args.relplot_pl = "test"
        args.mountain_pl = "test"
        args.mountain = True
        args.ps2pdf14_path = "test"
        self.srna._compute_2d_and_energy(args, ["test"], log)
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")

    def test_blast(self):
        self.srna.helper.merge_blast_out = self.mock.mock_merge_blast_out
        sr.extract_blast = self.mock.mock_extract_blast
        self.srna._run_blast = self.mock.mock_run_blast
        self.srna._run_format = self.mock.mock_run_format
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_nr_test"), "test")
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        args = self.mock_args.mock()
        args.blast_path = "test"
        args.para_blast = 1
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args.fastas = self.fastas
        args.out_folder = self.out
        args.blast_score_s = 0
        args.blast_score_n = 0
        self.srna._blast("database", False, "dna", args,
                         ["test"], "blast_all", "nr", 0.0001, "tss", log)
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")

    def test_class_srna(self):
        sr.classify_srna = self.mock.mock_classify_srna
        sr.gen_srna_table = self.mock.mock_gen_srna_table
        gff_out = os.path.join(self.out, "gffs")
        table_out = os.path.join(self.out, "tables")
        stat_out = os.path.join(self.out, "stat")
        os.mkdir(gff_out)
        os.mkdir(table_out)
        os.mkdir(stat_out)
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        os.mkdir(os.path.join(table_out, "for_classes"))
        os.mkdir(os.path.join(gff_out, "for_classes"))
        args = self.mock_args.mock()
        args.max_len = 300
        args.min_len = 30
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        self.srna._class_srna(["test"], args, log)
        self.assertTrue(os.path.exists(os.path.join(
            gff_out, "for_classes/test")))
        self.assertTrue(os.path.exists(os.path.join(
            table_out, "for_classes/test")))

    def test_filter_srna(self):
        self.srna.helper.merge_blast_out = self.mock.mock_merge_blast_out
        sr.classify_srna = self.mock.mock_classify_srna
        sr.gen_srna_table = self.mock.mock_gen_srna_table
        sr.extract_blast = self.mock.mock_extract_blast
        self.srna._run_blast = self.mock.mock_run_blast
        self.srna._run_format = self.mock.mock_run_format
        sr.extract_energy = self.mock.mock_extract_energy
        sr.change_format = self.mock.mock_change_format
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        self.srna._run_mountain = self.mock.mock_run_mountain
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        self.srna._run_normal = self.mock.mock_run_normal
        self.srna._run_utrsrna = self.mock.mock_run_utrsrna
        sr.merge_srna_gff = self.mock.mock_merge_srna_gff
        sr.merge_srna_table = self.mock.mock_merge_srna_table
        sr.extract_energy = self.mock.mock_extract_energy
        self.srna.helper.get_seq = self.mock.mock_get_seq
        self.srna._run_RNAfold = self.mock.mock_run_RNAfold
        stat_out = os.path.join(self.out, "stat")
        if "mountain_plot" not in os.listdir(self.out):
            os.mkdir(os.path.join(self.out, "mountain_plot"))
        sec_path = os.path.join(self.out, "sec_structure")
        if "sec_structure" not in os.listdir(self.out):
            os.mkdir(sec_path)
            os.mkdir(os.path.join(sec_path, "sec_plot"))
            os.mkdir(os.path.join(sec_path, "dot_plot"))
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "sRNA_seq_test"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "sRNA_index_test"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_energy_test"), "test")
        gen_file(os.path.join(self.out, "tmp_nr_test"), "test")
        gen_file(os.path.join(self.out, "tmp_sRNA_test"), "test")
        gen_file(os.path.join(self.out, "tmp_sRNA_test.csv"), "test")
        gen_file(os.path.join(self.test_folder, "srna"), "test")
        gen_file(os.path.join(self.test_folder, "nr"), "test")
        sr.blast_class = self.mock.mock_blast_class
        sr.srna_sorf_comparison = self.mock.mock_srna_sorf_comparison
        args = self.mock_args.mock()
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.out_folder = self.out
        args.fastas = self.fastas
        args.rnafold = "test"
        args.relplot_pl = "test"
        args.mountain_pl = "test"
        args.table_best = True
        args.in_cds = False
        args.ps2pdf14_path = "test"
        args.sorf_file = self.sorf
        args.mountain = True
        args.nr_database = os.path.join(self.test_folder, "nr")
        args.srna_database = os.path.join(self.test_folder, "srna")
        args.blastx = "blast_path"
        args.blastn = "blast_path"
        args.nr_format = False
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args.srna_format = False
        args.compute_sec_str = False
        args.e_nr = 0
        args.e_srna = 0
        args.para_blast = 1
        args.blast_score_s = 0
        args.blast_score_n = 0
        self.srna._filter_srna(args, ["test"], log)
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")
Пример #43
0
class TestCircRNADetection(unittest.TestCase):
    def setUp(self):
        self.segemehl = Mock_segemehl()
        self.samtools = Mock_samtools()
        self.mock_args = MockClass()
        self.example = Example()
        self.test_folder = "test_folder"
        self.fasta_folder = os.path.join(self.test_folder, "fasta")
        self.gff_folder = os.path.join(self.test_folder, "gff")
        self.out_folder = os.path.join(self.test_folder, "output")
        self.read_folder = os.path.join(self.test_folder, "read")
        self.splice_folder = os.path.join(self.test_folder, "splice")
        self.alignment_path = os.path.join(self.out_folder, "segemehl_align")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        if (not os.path.exists(self.fasta_folder)):
            os.mkdir(self.fasta_folder)
            os.mkdir(os.path.join(self.fasta_folder, "tmp"))
        if (not os.path.exists(self.gff_folder)):
            os.mkdir(self.gff_folder)
        if (not os.path.exists(self.out_folder)):
            os.mkdir(self.out_folder)
        if (not os.path.exists(self.read_folder)):
            os.mkdir(self.read_folder)
        if (not os.path.exists(self.splice_folder)):
            os.mkdir(self.splice_folder)
        args = self.mock_args.mock()
        args.output_folder = self.out_folder
        args.gffs = self.gff_folder
        args.align = True
        args.fastas = self.fasta_folder
        self.circ = CircRNADetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        if os.path.exists("test1"):
            if os.path.isfile("test1"):
                os.remove("test1")
            if os.path.isdir("test1"):
                shutil.rmtree("test1")
        if os.path.exists("test2"):
            if os.path.isfile("test2"):
                os.remove("test2")
            if os.path.isdir("test2"):
                shutil.rmtree("test2")

    def test_deal_zip_file(self):
        out1 = os.path.join(self.test_folder, "test1.fa")
        out2 = os.path.join(self.test_folder, "test2")
        gen_file(out1, self.example.fasta_file)
        gen_file(out2, self.example.fasta_file)
        os.system("gzip " + out1)
        os.system("bzip2 -z " + out2)
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        reads = self.circ._deal_zip_file(
            [{
                "sample": "all",
                "files": [out1 + ".gz", out2 + ".bz2"]
            }], log)
        self.assertEqual(reads, [{
            'files': [
                'test_folder/test1.fa.gz', 'test_folder/test2.bz2',
                'test_folder/test1.fa', 'test_folder/test2.fa'
            ],
            'zips': ['test_folder/test1.fa', 'test_folder/test2.fa'],
            'sample':
            'all'
        }])
        self.assertTrue(os.path.exists(out1))
        self.assertTrue(os.path.exists(out2 + ".fa"))

    def test_align(self):
        self.circ._run_segemehl_fasta_index = self.segemehl.mock_fasta_index
        self.circ._run_segemehl_align = self.segemehl.mock_align
        self.circ._wait_process = self.segemehl.mock_wait_processes
        fasta1 = os.path.join(os.path.join(self.fasta_folder, "tmp/test1.fa"))
        fasta2 = os.path.join(os.path.join(self.fasta_folder, "tmp/test2.fa"))
        read1 = os.path.join(self.read_folder, "read1.fa")
        read2 = os.path.join(self.read_folder, "read2.fa")
        gen_file(fasta1, self.example.fasta_file)
        gen_file(fasta2, self.example.fasta_file)
        gen_file(read1, self.example.fasta_file)
        gen_file(read2, self.example.fasta_file)
        os.mkdir(os.path.join(self.out_folder, "segemehl_alignment_files"))
        args = self.mock_args.mock()
        args.output_folder = self.out_folder
        args.gffs = self.gff_folder
        args.align = True
        args.fastas = self.fasta_folder
        args.segemehl_path = None
        args.read_files = [read1, read2]
        args.cores = 2
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        read_datas = [{"sample": "test", "files": [read1, read2]}]
        align_results, prefixs = self.circ._align(args, read_datas, log)
        self.assertEqual(
            set(align_results),
            set(['read1_test1', 'read2_test1', 'read1_test2', 'read2_test2']))
        self.assertEqual(set(prefixs), set(['test1', 'test2']))

    def test_convert_sam2bam(self):
        self.circ._run_samtools_convert_bam = self.samtools.mock_covert_bam
        sam1 = os.path.join(self.test_folder, "test1.sam")
        sam2 = os.path.join(self.test_folder, "test2.sam")
        bam = os.path.join(self.test_folder, "test3.bam")
        gen_file(sam1, self.example.align_file)
        gen_file(sam2, self.example.align_file)
        gen_file(bam, self.example.align_file)
        align_files = ["test1"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam(
            self.test_folder, None, align_files, log)
        self.assertEqual(
            set(bam_files),
            set([bam,
                 sam1.replace("sam", "bam"),
                 sam2.replace("sam", "bam")]))
        self.assertEqual(set(convert_ones), set([sam2.replace("sam", "bam")]))
        self.assertEqual(set(remove_ones), set([sam1]))
        align_files = ["test3"]
        bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam(
            self.test_folder, None, align_files, log)
        self.assertEqual(
            set(convert_ones),
            set([sam2.replace("sam", "bam"),
                 sam1.replace("sam", "bam")]))
        self.assertEqual(set(remove_ones), set([]))

    def test_merge_bed(self):
        fasta1 = os.path.join(self.fasta_folder, "test1.fa")
        fasta2 = os.path.join(self.fasta_folder, "test2.fa")
        header1 = os.path.join(self.splice_folder,
                               "Staphylococcus_aureus_HG003")
        header2 = os.path.join(self.splice_folder, "aaa")
        header3 = os.path.join(self.splice_folder, "bbb")
        os.mkdir(header1)
        os.mkdir(header2)
        os.mkdir(header3)
        splice1 = os.path.join(
            header1, "Staphylococcus_aureus_HG003_a1_splicesites.bed")
        splice2 = os.path.join(header2, "aaa_a1_splicesites.bed")
        splice3 = os.path.join(header3, "bbb_a1_splicesites.bed")
        tran1 = os.path.join(
            header1, "Staphylococcus_aureus_HG003_a1_transrealigned.bed")
        tran2 = os.path.join(header2, "aaa_a1_transrealigned.bed")
        tran3 = os.path.join(header3, "bbb_a1_transrealigned.bed")
        gen_file(fasta1, self.example.fasta_file)
        gen_file(fasta2, self.example.multi_fasta_file)
        gen_file(splice1, self.example.splice_file)
        gen_file(splice2, self.example.splice_file)
        gen_file(splice3, self.example.splice_file)
        gen_file(tran1, self.example.tran_file)
        gen_file(tran2, self.example.tran_file)
        gen_file(tran3, self.example.tran_file)
        prefixs = self.circ._merge_bed(self.fasta_folder, self.splice_folder,
                                       self.out_folder)
        self.assertEqual(set(prefixs[1]), set(["test1", "test2"]))
        self.assertEqual(prefixs[0][0], "_a1_")
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test1",
                             "test1_a1_splicesites.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test1",
                             "test1_a1_transrealigned.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "test2_a1_splicesites.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "test2_a1_transrealigned.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "tmp_bbb_a1_splicesites.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "tmp_aaa_a1_splicesites.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "tmp_aaa_a1_transrealigned.bed")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "test2",
                             "tmp_bbb_a1_transrealigned.bed")))

    def test_combine_read_bam(self):
        bam_datas = [{
            "sample":
            "aaa",
            "files": [
                os.path.join(self.out_folder, "segemehl_alignment_files",
                             "aaa1.bam"), "aaa2.bam"
            ]
        }, {
            "sample": "bbb",
            "files": ["bbb1.bam", "bbb2.bam"]
        }]
        read_datas = [{
            "sample": "aaa",
            "files": ["aaa1.fa", "aaa3.fa", "aaa4.fa"]
        }]
        bam_files = [
            os.path.join(self.out_folder, "segemehl_alignment_files",
                         "aaa1.bam"),
            os.path.join(self.out_folder, "segemehl_alignment_files",
                         "aaa3.bam")
        ]
        self.circ._combine_read_bam(bam_files, bam_datas, read_datas)
        self.assertDictEqual(
            bam_datas[0], {
                'files': [
                    'test_folder/output/segemehl_alignment_files/aaa1.bam',
                    'aaa2.bam',
                    'test_folder/output/segemehl_alignment_files/aaa3.bam'
                ],
                'sample':
                'aaa'
            })
Пример #44
0
class TestRATT(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.ref_embls = "test_folder/embls"
        self.output_path = "test_folder/output"
        self.tar_fastas = "test_folder/tar_fasta"
        self.ref_fastas = "test_folder/ref_fasta"
        self.gff_outfolder = "test_folder/gffs"
        self.ref_gbk = "test_folder/gbk"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.ref_embls)
            os.mkdir(self.ref_gbk)
            os.mkdir(self.output_path)
            os.mkdir(self.tar_fastas)
            os.mkdir(self.ref_fastas)
            os.mkdir(self.gff_outfolder)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.ref_embls = self.ref_embls
        args.ref_gbk = self.ref_gbk
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.gff_outfolder = self.gff_outfolder
        self.ratt = RATT(args)
        self.example = Example()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_convert_to_pttrnt(self):
        files = ["aaa.gff"]
        gen_file(os.path.join(self.test_folder, "aaa.gff"),
                 self.example.gff_file)
        os.mkdir(os.path.join(self.tar_fastas, "tmp"))
        gen_file(os.path.join(self.tar_fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        self.ratt._convert_to_pttrnt(self.test_folder, files)
        data = import_data(os.path.join(self.test_folder, "aaa.rnt"))
        self.assertEqual("\n".join(data), self.example.rnt_file)
        data = import_data(os.path.join(self.test_folder, "aaa.ptt"))
        self.assertEqual("\n".join(data), self.example.ptt_file)

    def test_convert_to_gff(self):
        files = ["aaa.gff"]
        ratt_result = "chromosome.aaa.final.embl"
        gen_file(os.path.join(self.output_path, ratt_result),
                 self.example.embl_file)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        self.ratt._convert_to_gff(ratt_result, args, files)
        #        self.ratt._convert_to_gff(ratt_result, self.output_path, self.gff_outfolder, files)
        data = import_data(os.path.join(self.output_path, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)
        data = import_data(os.path.join(self.gff_outfolder, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)

    def test_parser_embl_gbk(self):
        files = [os.path.join(self.test_folder, "aaa.gbk")]
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file)
        self.ratt._parser_embl_gbk(files)
        data = import_data(
            os.path.join(self.ref_gbk, "gbk_tmp/NC_007795.1.gbk"))
        self.assertEqual("\n".join(data),
                         self.example.gbk_file.split("//")[0] + "//")
        data = import_data(
            os.path.join(self.ref_gbk, "gbk_tmp/NC_007799.1.gbk"))
        self.assertEqual("\n".join(data),
                         self.example.gbk_file.split("//")[1].strip() + "\n//")

    def test_convert_embl(self):
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        out = self.ratt._convert_embl(self.test_folder)
        self.assertEqual(out, "test_folder/gbk/gbk_tmp")
        self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp"))

    def test_format_and_run(self):
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.output_path
        args.pairs = ["NC_007795.1:Staphylococcus_aureus_HG003"]
        args.element = "chromosome"
        self.ratt._format_and_run(args)

    def test_annotation_transfer(self):
        gen_file(os.path.join(self.ref_fastas, "aaa.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tar_fastas, "bbb.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.ref_embls, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.element = "element"
        args.ref_embls = self.ref_embls
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        args.pairs = ["aaa:bbb"]
        args.convert = True
        self.ratt.annotation_transfer(args)
        #        self.ratt.annotation_transfer("test", "element", "test_type",
        #                                      self.ref_embls, self.tar_fastas,
        #                                      self.ref_fastas, self.output_path,
        #                                      True, self.gff_outfolder, pairs)
        self.assertTrue(
            os.path.exists(os.path.join(self.gff_outfolder, "bbb.gff")))
        self.assertTrue(
            os.path.exists(os.path.join(self.gff_outfolder, "bbb.rnt")))
        self.assertTrue(
            os.path.exists(os.path.join(self.gff_outfolder, "bbb.ptt")))
Пример #45
0
class TestRATT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.ref_embls = "test_folder/embls"
        self.output_path = "test_folder/output"
        self.tar_fastas = "test_folder/tar_fasta"
        self.ref_fastas = "test_folder/ref_fasta"
        self.gff_outfolder = "test_folder/gffs"
        self.ref_gbk = "test_folder/gbk"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.ref_embls)
            os.mkdir(self.ref_gbk)
            os.mkdir(self.output_path)
            os.mkdir(self.tar_fastas)
            os.mkdir(self.ref_fastas)
            os.mkdir(self.gff_outfolder)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.ref_embls = self.ref_embls
        args.ref_gbk = self.ref_gbk
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.gff_outfolder = self.gff_outfolder
        self.ratt = RATT(args)
        self.example = Example()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_convert_to_pttrnt(self):
        files = ["aaa.gff"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        gen_file(os.path.join(self.test_folder, "aaa.gff"),
                 self.example.gff_file)
        os.mkdir(os.path.join(self.tar_fastas, "tmp"))
        gen_file(os.path.join(self.tar_fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        self.ratt._convert_to_pttrnt(self.test_folder, files, log)
        data = import_data(os.path.join(self.test_folder, "aaa.rnt"))
        self.assertEqual("\n".join(data), self.example.rnt_file)
        data = import_data(os.path.join(self.test_folder, "aaa.ptt"))
        self.assertEqual("\n".join(data), self.example.ptt_file)

    def test_convert_to_gff(self):
        files = ["aaa.gff"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        ratt_result = "chromosome.aaa.final.embl"
        gen_file(os.path.join(self.output_path, ratt_result),
                 self.example.embl_file)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        self.ratt._convert_to_gff(ratt_result, args, files, log)
        data = import_data(os.path.join(self.output_path, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)
        data = import_data(os.path.join(self.gff_outfolder, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)

    def test_parser_embl_gbk(self):
        files = [os.path.join(self.test_folder, "aaa.gbk")]
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file)
        self.ratt._parser_embl_gbk(files)
        data = import_data(os.path.join(self.ref_gbk,
                           "gbk_tmp/NC_007795.1.gbk"))
        self.assertEqual(
            "\n".join(data),
            self.example.gbk_file.split("//")[0] + "//")
        data = import_data(os.path.join(
            self.ref_gbk, "gbk_tmp/NC_007799.1.gbk"))
        self.assertEqual(
            "\n".join(data),
            self.example.gbk_file.split("//")[1].strip() + "\n//")

    def test_convert_embl(self):
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        out = self.ratt._convert_embl(self.test_folder, log)
        self.assertEqual(out, "test_folder/gbk/gbk_tmp")
        self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp"))

    def test_format_and_run(self):
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.output_path
        args.pairs = ["NC_007795.1:Staphylococcus_aureus_HG003"]
        args.element = "chromosome"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ratt._format_and_run(args, log)

    def test_annotation_transfer(self):    
        gen_file(os.path.join(self.ref_fastas, "aaa.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tar_fastas, "bbb.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.ref_embls, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.element = "element"
        args.ref_embls = self.ref_embls
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        args.pairs = ["aaa:bbb"]
        args.convert = True
        self.ratt.annotation_transfer(args, log)
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.gff")))
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.rnt")))
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.ptt")))
Пример #46
0
class TestTranscripSNP(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_data(self):
        snp_file = os.path.join(self.test_folder, "snp")
        gen_file(snp_file, self.example.snp_file)
        depth_file = os.path.join(self.test_folder, "depth")
        gen_file(depth_file, self.example.depth_file)
        args = self.mock_args.mock()
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        max_quals, snps, dess, raw_snps = ts.import_data(snp_file, args, 2, depth_file)
        self.assertDictEqual(max_quals, {'NC_007795.1': 98.0, 'All_strain': 98.0})
        self.assertListEqual(snps, [{'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.',
        'indel': -1, 'pos': 1, 'id': '.',
        'all_info': 'NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;DP4=0,0,60,9;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87',
        'qual': 98.0, 'info': ['DP=89', 'DP4=0,0,60,9', 'VDB=8.46526e-15'],
        'alt': 'A', 'ref': 'C', 'frac': -1, 'depth': 89, 'dp4_sum': 69},
        {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.', 'indel': 22,
        'pos': 6, 'id': '.', 'all_info': 'NC_007795.1\t6\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14;DP4=0,0,40,0\tGT:PL:DP\t0/1:60,0,55:40',
        'qual': 26.9515, 'info': ['INDEL', 'IDV=22', 'IMF=0.536585', 'DP=41', 'VDB=9.36323e-14', 'DP4=0,0,40,0'],
        'alt': 'AA', 'ref': 'A', 'frac': 0.536585, 'depth': 41, 'dp4_sum': 40}])

    def test_check_overlap(self):
        snps = {"test": []}
        overlaps = [{"test": []}]
        ts.check_overlap(snps, overlaps)
        self.assertListEqual(overlaps, [{'test': [], 'print': True}])
        self.assertDictEqual(snps, {'test': [{'test': [], 'print': True}]})

    def test_overlap_position(self):
        qual_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'CA', 'strain': 'NC_007795.1',
                      'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'},
                     {'filter': '.', 'pos': 22182, 'alt': 'C',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'A', 'strain': 'NC_007795.1', 
                      'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'},
                     {'filter': '.', 'pos': 30000, 'alt': 'A',
                      'frac': -1, 'depth': 89, 'indel': -1,
                      'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                      'ref': 'C', 'strain': 'NC_007795.1', 
                      'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}]
        conflicts, nooverlap = ts.overlap_position(qual_snps)
        self.assertListEqual(conflicts, [[{'strain': 'NC_007795.1', 'info': 'MQ=20',
                                           'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1,
                                           'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181,
                                           'filter': '.', 'id': '.',
                                           'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'},
                                          {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
                                           'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C',
                                           'depth': 89, 'print': True, 'pos': 22182, 'filter': '.',
                                           'id': '.',
                                           'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}]])
        self.assertDictEqual(nooverlap, {1: [{'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
                                              'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A',
                                              'depth': 89, 'print': True, 'pos': 22181, 'filter': '.', 'id': '.',
                                              'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'},
                                             {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
                                              'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A',
                                              'depth': 89, 'print': True, 'pos': 30000, 'filter': '.', 'id': '.',
                                              'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}],
                                         2: [{'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0,
                                              'ref': 'A', 'frac': -1, 'alt': 'C', 'depth': 89, 'print': True,
                                              'pos': 22182, 'filter': '.', 'id': '.',
                                              'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'},
                                             {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1,
                                              'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A', 'depth': 89,
                                              'print': True, 'pos': 30000, 'filter': '.', 'id': '.',
                                              'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}]})

    def test_stat(self):
        stat_file = os.path.join(self.test_folder, "stat")
        max_quals = {'NC_007795.1': 98.0, 'All_strain': 98.0}
        trans_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                       'frac': -1, 'depth': 89, 'indel': -1,
                       'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                       'ref': 'C', 'strain': 'NC_007795.1',
                       'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}]
        args = self.mock_args.mock()
        args.depth = 50
        args.fraction = 0.3
        args.quality = 20
        ts.stat(max_quals, trans_snps, 2, stat_file, self.test_folder + "/test", args, "best.csv")
        datas = import_data(stat_file + "_best.csv")
        self.assertEqual("\n".join(datas), self.example.stat)

    def test_plot_bar(self):
        ts.plot_bar([3, 10, 30, 45, 50], "NC_007795.1", self.test_folder + "/test", "best.png")
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_NC_007795.1_SNP_QUAL_best.png")))

    def test_read_fasta(self):
        fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta_file, self.example.fasta_file)
        seqs = ts.read_fasta(fasta_file)
        self.assertListEqual(seqs, [{'NC_007795.1': 'AAATATATCAGCACCGTAGACGATAGAGTAGTAC'}])

    def test_gen_ref(self):
        refs = []
        snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'},
                {'filter': '.', 'pos': 22500, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': 'NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}]
        refs = ts.gen_ref(snps, 1, refs, 1)
        self.assertListEqual(refs, ['1:A', '1:A'])
        snps = [{'filter': '.', 'pos': 22181, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'},
                {'filter': '.', 'pos': 22500, 'alt': 'A',
                 'frac': -1, 'depth': 89, 'indel': -1,
                 'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                 'ref': 'C', 'strain': 'NC_007795.1',
                 'all_info': 'NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}]
        refs = ts.gen_ref(snps, 1, refs, 2)
        self.assertListEqual(refs, ['1:A_1:A', '1:A_1:A', '1:A_1:A', '1:A_1:A'])

    def test_change(self):
        snp = {'filter': '.', 'pos': 1, 'alt': 'A',
                'frac': -1, 'depth': 89, 'indel': -1,
                'info': 'MQ=20', 'id': '.', 'qual': 98.0,
                'ref': 'C', 'strain': 'NC_007795.1',
                'all_info': 'NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}
        seq = {"num_mod": 3, "seq": "CCCCATATCAGCACCGTAGACGATAGAGTAGTAC"}
        ts.change(snp, seq)
        self.assertDictEqual(seq, {'num_mod': 3, 'seq': 'CCCaATATCAGCACCGTAGACGATAGAGTAGTAC'})

    def test_print_file(self):
        refs = {'NC_007795.1': ['1:A', '1:GT']}
        conflicts = [[{'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87',
                       'filter': '.', 'id': '.', 'frac': -1, 'indel': -1,
                       'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0,
                       'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True},
                      {'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40',
                       'filter': '.', 'id': '.', 'frac': 0.536585, 'indel': 22,
                       'alt': 'AA', 'info': 'VDB=9.36323e-14 GT:PL:DP', 'qual': 26.9515,
                       'ref': 'A', 'strain': 'NC_007795.1', 'depth': 41, 'pos': 2, 'print': True}]]
        values = [{'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87',
                   'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A,GT',
                   'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'CA',
                   'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True},
                  {'all_info': 'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87',
                   'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A',
                   'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'C',
                   'strain': 'NC_007795.1', 'depth': 89, 'pos': 7, 'print': True}]
        mod_seq_init = {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}
        mod_seqs = [{'genome': 'NC_007795.1', 'num_mod': -1, 'seq': 'aGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}, {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'gtGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}]
        out_ref = StringIO()
        out_seq = os.path.join(self.test_folder, "seq")
        ts.print_file(refs, out_ref, conflicts, 1, values, mod_seq_init,
                      mod_seqs, out_seq)
        self.assertEqual(out_ref.getvalue(), "1\t1\t1\t1:A\tNC_007795.1\n1\t1\t2\t1:GT\tNC_007795.1\n")
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa")))

    def test_gen_new_fasta(self):
        out_ref = StringIO()
        out_seq = os.path.join(self.test_folder, "seq")
        nooverlap = {1: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A,GT',
                          'filter': '.', 'frac': -1, 'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 1,
                          'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'},
                         {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A',
                          'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 7, 'all_info':
                          'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}],
                     2: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'AA', 'filter': '.',
                          'frac': 0.536585, 'ref': 'A', 'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP',
                          'indel': 22, 'qual': 26.9515, 'pos': 2,
                          'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40'},
                         {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A',
                          'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15',
                          'indel': -1, 'qual': 98.0, 'pos': 7,
                          'all_info': 'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}]}
        seqs = [{'NC_007795.1': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}]
        conflicts = [[{'strain': 'NC_007795.1', 'print': True, 'id': '.',
                       'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA',
                       'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1,
                       'qual': 98.0, 'pos': 1,
                       'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'},
                      {'strain': 'NC_007795.1', 'print': True, 'id': '.',
                       'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A',
                       'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP', 'indel': 22,
                       'qual': 26.9515, 'pos': 2,
                       'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40'}]]
        ts.gen_new_fasta(nooverlap, seqs, out_ref, conflicts, out_seq)
        self.assertEqual(out_ref.getvalue(), "1\t1\t1\t1:A\tNC_007795.1\n1\t1\t2\t1:GT\tNC_007795.1\n")
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_2_1.fa")))

    def test_snp_detect(self):
        depth_file = os.path.join(self.test_folder, "depth")
        gen_file(depth_file, self.example.depth_file)
        fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta_file, self.example.fasta_final)
        snp_file = os.path.join(self.test_folder, "NC_007795.1.snp")
        gen_file(snp_file, self.example.snp_final)
        out_seq = os.path.join(self.test_folder, "seq")
        out_snp = os.path.join(self.test_folder, "snp")
        stat_file = os.path.join(self.test_folder, "stat")
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 5
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        ts.snp_detect(fasta_file, snp_file, depth_file, out_snp, out_seq,
                      2, stat_file, args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_2_1.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_seq_reference.csv")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_best.vcf")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_best.png")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_raw.png")))
class TestOptimizeTSSpredator(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_initiate(self):
        args = self.mock_args.mock()
        args.height = 0.9
        args.height_reduction = 0.8
        args.factor = 0.9
        args.factor_reduction = 0.8
        args.base_height = 0.01
        args.enrichment = 0.5
        args.processing = 0.5
        max_num, best_para, current_para, indexs = ot.initiate(args)
        self.assertDictEqual(max_num, {'re_factor': 0.8, 'processing': 0.5, 'enrichment': 0.5,
                                       'height': 0.9, 'base_height': 0.01, 're_height': 0.8,
                                       'factor': 0.9})
        self.assertDictEqual(best_para, {'re_factor': 0, 'processing': 0, 'enrichment': 0,
                                         'height': 0, 'base_height': 0, 're_height': 0, 'factor': 0})
        self.assertDictEqual(current_para, {'re_factor': 0, 'processing': 0, 'enrichment': 0,
                                            'height': 0, 'base_height': 0, 're_height': 0,
                                            'factor': 0})
        self.assertDictEqual(indexs, {'step': 0, 'change': False, 'num': 0, 'first': True,
                                      'length': 0, 'exist': False, 'switch': 0, 'extend': False,
                                      'count': 0})

    def test_get_gene_length(self):
        fasta = os.path.join(self.test_folder, "test.fa")
        gen_file(fasta, self.example.fasta)
        seq_len = ot.get_gene_length(fasta, "aaa")
        self.assertEqual(seq_len, 102)

    def test_read_predict_manual_gff(self):
        gff = os.path.join(self.test_folder, "test.gff")
        gen_file(gff, self.example.gff_file)
        args = self.mock_args.mock()
        args.gene_length = 1000
        num, gffs = ot.read_predict_manual_gff(gff, args)
        self.assertEqual(num, 1)
        self.assertEqual(gffs[0].start, 633)

    def test_scoring_function(self):
        stat_value = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3}
        best = {"tp_rate": 0.8, "fp_rate": 0.0005, "tp": 100, "fp": 31, "fn": 45, "missing_ratio": 0.004}
        ot.scoring_function(best, stat_value, self.example.indexs, 1000)
        self.assertTrue(self.example.indexs["change"])
        self.example.indexs["change"] = False
        stat_value = {"tp_rate": 0.8, "fp_rate": 0.0004, "tp": 100, "fp": 13}
        best = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3}
        ot.scoring_function(best, stat_value, self.example.indexs, 1000)
        self.assertFalse(self.example.indexs["change"])

    def test_load_stat_csv(self):
        gen_file(os.path.join(self.test_folder, "stat.csv"), self.example.stat)
        list_num = []
        best_para = {}
        datas = ot.load_stat_csv(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000)
        self.assertEqual(datas[0], 2)
        self.assertDictEqual(datas[1], {'fp': 230.0, 'tp': 789.0, 'missing_ratio': 0.29991126885536823,
                                        'fp_rate': 8.15542105020548e-05, 'tp_rate': 0.7000887311446318,
                                        'fn': 338.0})
        self.assertDictEqual(datas[2], {'processing': 5.2, 'base_height': 0.086, 'factor': 7.6,
                                        're_height': 2.3, 're_factor': 5.5, 'enrichment': 3.1,
                                        'height': 2.4})

    def test_reload_data(self):
        gen_file(os.path.join(self.test_folder, "stat.csv"), self.example.stat)
        list_num = []
        best_para = {}
        datas = ot.reload_data(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000)
        self.assertDictEqual(datas[0], {'base_height': 0.086, 'processing': 5.2,
                                        'height': 2.4, 'enrichment': 3.1, 're_factor': 5.5,
                                        're_height': 2.3, 'factor': 7.6})
        self.assertDictEqual(datas[1], {'tp_rate': 0.7000887311446318, 'tp': 789.0,
                                        'fn': 338.0, 'fp': 230.0, 'fp_rate': 8.15542105020548e-05,
                                        'missing_ratio': 0.29991126885536823})

    def test_extend_data(self):
        best_para = copy.deepcopy(self.example.best_para)
        current_para = ot.extend_data(self.test_folder, self.example.best, best_para, 100)
        self.assertDictEqual(current_para, best_para)

    def test_run_random_part(self):
        list_num = []
        current_para = copy.deepcopy(self.example.ref_para)
        para = ot.run_random_part(current_para, list_num, self.example.max_nums, 1000, self.example.indexs)
        self.assertTrue(para != self.example.ref_para)

    def test_run_large_change_part(self):
        list_num = []
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "r", "pre_feature": ""}
        current_para = copy.deepcopy(self.example.ref_para)
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.run_large_change_part(seeds, features, self.example.indexs, current_para,
                                        self.example.max_nums, best_para, list_num)
        self.assertTrue(para != self.example.ref_para)
        self.assertTrue(para != best_para)

    def test_gen_large_random(self):
        list_num = []
        index_large = {0: "height", 1: "re_height", 2: "factor", 3: "re_factor",
                       4:"base_height", 5: "enrichment", 6: "processing"}
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.gen_large_random(self.example.max_nums, "height", 0.2, list_num, 0.3,
                                   best_para, index_large, self.example.indexs)
        self.assertTrue(para != best_para)
        self.assertTrue(para["height"] > para["re_height"])

    def test_run_small_change_part(self):
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "l", "pre_feature": ""}
        current_para = copy.deepcopy(self.example.ref_para)
        list_num = []
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.run_small_change_part(seeds, features, self.example.indexs, current_para,
                                        best_para, list_num, self.example.max_nums)
        self.assertTrue(para != best_para)

    def test_small_change(self):
        list_num = []
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.small_change(0.9, "height", 0.2, list_num, 0.5, best_para)
        self.assertTrue(para != 0.5)
        self.assertTrue(para > 0.2)

    def test_plus_process(self):
        list_num = []
        actions = {"plus": False, "minus": False}
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.plus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.2)
        self.assertEqual(para, 0.4)

    def test_minus_process(self):
        list_num = []
        actions = {"plus": False, "minus": False}
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.minus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.1)
        self.assertEqual(para, 0.2)

    def test_compare_manual_predict(self):
        out = StringIO()
        manual = os.path.join(self.test_folder, "manual.gff")
        predict = os.path.join(self.test_folder, "predict.gff")
        gen_file(manual, self.example.manual_file)
        gen_file(predict, self.example.gff_file)
        para_list = [copy.deepcopy(self.example.best_para)]
        args = self.mock_args.mock()
        args.manual = manual
        args.cores = 1
        args.gene_length = 2000
        args.cluster = 3
        ot.compare_manual_predict(1000, para_list, [predict], self.test_folder,
                                  out, args)
        self.assertEqual(out.getvalue(), "1000\the_0.3_rh_0.2_fa_0.7_rf_0.3_bh_0.0_ef_2.5_pf_3.3\tTP\t1\tTP_rate\t0.5\tFP\t1\tFP_rate\t0.0005005005005005005\tFN\t1\tmissing_ratio\t0.5\n")

    def test_compute_stat(self):
        list_num = [self.example.best_para]
        best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5,
                     'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7}
        self.example.indexs["change"] = True
        best = {"tp_rate": 0.6, "fp_rate": 0.0025, "tp": 40, "fp": 32, "fn": 45, "missing_ratio": 0.004}
        datas = ot.compute_stat(self.example.best, best, best_para, 1, list_num, self.test_folder, self.example.indexs)
        self.assertDictEqual(datas[0], self.example.best_para)
        self.assertDictEqual(datas[1], self.example.best)

    def test_run_tss_and_stat(self):
        list_num = [self.example.best_para]
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "l", "pre_feature": ""}
        best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5,
                     'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7}
        current_para = {'re_factor': 0.3, 'processing': 2.3, 'enrichment': 2.5,
                        'height': 0.5, 'base_height': 0.2, 're_height': 0.2, 'factor': 0.7}
        stat_out = StringIO()
        wig = os.path.join(self.test_folder, "wig")
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        if not os.path.exists(wig):
            os.mkdir(wig)
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        output_prefix = ["test_aaa"]
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        args = self.mock_args.mock()
        args.steps = 2000
        args.cores = 1
        args.tsspredator_path = "test"
        args.libs = self.example.libs
        args.program = "TSS"
        args.cluster = 3
        args.utr = 200
        args.replicate = 2
        args.replicate_name = "rep"
        args.project_strain = "aaa"
        args.manual = os.path.join(self.test_folder, "manual.gff")
        args.gene_length = 2000
        gen_file(args.manual, self.example.manual_file)
        datas = ot.run_tss_and_stat(self.example.indexs, list_num, seeds, 0.4, 0.3,
                     self.test_folder, stat_out, best_para, current_para,
                     wig, fasta, gff, self.example.best, 3, args)
        self.assertFalse(datas[0])

    def test_gen_config(self):
        wig = os.path.join(self.test_folder, "wig")
        if not os.path.exists(wig):
            os.mkdir(wig)
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = 1
        args.utr = 200
        args.replicate_name = "test"
        filename = ot.gen_config(self.example.best_para, self.test_folder, 1, wig, fasta, gff, args)
        self.assertEqual(filename, "test_folder/config_1.ini")
        data = import_data("test_folder/config_1.ini")
        self.assertEqual("\n".join(data), self.example.config)       

    def test_comparison(self):
        nums = {"overlap": 0, "predict": 0, "manual": 0}
        for index in range(0, 3):
            self.example.mans[index].attributes["print"] = False
            self.example.gffs[index].attributes["print"] = False
        args = self.mock_args.mock()
        args.cluster = 3
        args.gene_length = 2000
        ot.comparison(self.example.mans, self.example.gffs, nums, args)
        self.assertDictEqual(nums, {'manual': 1, 'predict': 2, 'overlap': 1})

    def test_check_overlap(self):
        nums = {"overlap": 0, "predict": 0, "manual": 0}
        datas = ot.check_overlap(True, None, nums, 2000, self.example.mans[0], self.example.gffs[0], 100)
        self.assertFalse(datas[0])
        self.assertEqual(datas[1], 140)

    def test_print_lib(self):
        libs = [{"condition": 1, "replicate": "a", "wig": "test_1.wig"},
                {"condition": 2, "replicate": "a", "wig": "test_2.wig"}]
        out = StringIO()
        ot.print_lib(2, libs, out, self.test_folder, "aaa")
        self.assertEqual(out.getvalue(), "aaa_1a = test_folder/test_1.wig\naaa_2a = test_folder/test_2.wig\n")

    def test_import_lib(self):
        out = StringIO()
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        lib_dict = {"fp": [], "fm": [], "np": [], "nm": []}
        gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test")
        args = self.mock_args.mock()
        args.project_strain = "aaa"
        args.program = "TSS"
        args.libs = self.example.libs
        lib_num = ot.import_lib(wig_folder, set(), lib_dict, out, "aaa.gff",
                                [], "aaa.fa", args)
        self.assertEqual(lib_num, 1)

    def test_optimization_process(self):
        current_para = copy.deepcopy(self.example.ref_para)
        best_ref_para = copy.deepcopy(self.example.best_para)
        list_num = [best_ref_para]
        indexs = copy.deepcopy(self.example.indexs)
        best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5,
                     'height': 0.6, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7}
        stat_out = StringIO()
        output_prefix = ["test_1"]
        gen_file(os.path.join(self.test_folder, "manual.gff"), self.example.manual_file)
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test")
        gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test")
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = 1
        args.utr = 200
        args.replicate_name = "test"
        args.steps = 2
        args.tsspredator_path = "test"
        args.gene_length = 2000
        args.manual = os.path.join(self.test_folder, "manual.gff")
        ot.optimization_process(indexs, current_para, list_num, self.example.max_nums, best_para,
                                self.test_folder, stat_out, self.example.best, wig_folder, "aaa.fa", "aaa.gff",
                                2000, True, args)
        self.assertDictEqual(best_para, {'re_height': 0.2, 'factor': 0.7, 'processing': 3.3,
                                         'height': 0.6, 'base_height': 0.0, 're_factor': 0.3,
                                         'enrichment': 2.5})
        self.assertDictEqual(self.example.best, {'missing_ratio': 0.29991126885536823, 'tp': 789.0,
                                                 'tp_rate': 0.7000887311446318, 'fp': 230.0,
                                                 'fn': 338.0, 'fp_rate': 8.15542105020548e-05})

    def test_optimization(self):
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        output_prefix = ["test_1"]
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = 1
        args.utr = 200
        args.steps = 2
        args.gene_length = 2000
        args.height = 0.9
        args.height_reduction = 0.8
        args.factor = 0.9
        args.factor_reduction = 0.8
        args.base_height = 0.01
        args.enrichment = 0.5
        args.processing = 0.5
        args.length = None
        args.replicate_name = "test"
        args.tsspredator_path = "test"
        args.manual = os.path.join(self.test_folder, "manual.gff")
        gen_file(args.manual, self.example.manual_file)
        args.output_folder = self.test_folder
        ot.optimization(wig_folder, fasta, gff, args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "optimized_TSSpredator", "stat.csv")))
Пример #48
0
class TestsTSSpredator(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.terms = "test_folder/terms"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.out)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.terms)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.trans = self.trans
        args.out_folder = self.out
        self.utr = UTRDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compute_utr(self):
        ut.detect_5utr = self.mock.mock_detect_5utr
        ut.detect_3utr = self.mock.mock_detect_3utr
        term_path = os.path.join(self.terms, "tmp")
        os.mkdir(term_path)
        utr5_path = os.path.join(self.out, "5UTR")
        utr3_path = os.path.join(self.out, "3UTR")
        os.mkdir(utr5_path)
        os.mkdir(utr3_path)
        utr5_stat_path = os.path.join(utr5_path, "statistics")
        utr3_stat_path = os.path.join(utr3_path, "statistics")
        os.mkdir(utr5_stat_path)
        os.mkdir(utr3_stat_path)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.tran_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file)
        gen_file(os.path.join(term_path, "test_term.gff"), self.example.term_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.tsss = self.tsss
        args.trans = self.trans
        args.terms = self.terms
        self.utr._compute_utr(args)
        self.assertTrue(os.path.exists(os.path.join(utr5_stat_path, "test_5utr_length.png")))
        self.assertTrue(os.path.exists(os.path.join(utr3_stat_path, "test_3utr_length.png")))

    def test_run_utr_detection(self):
        self.utr._check_gff = self.mock.mock_check_gff
        ut.detect_5utr = self.mock.mock_detect_5utr
        ut.detect_3utr = self.mock.mock_detect_3utr
        utr5_path = os.path.join(self.out, "5UTR")
        utr3_path = os.path.join(self.out, "3UTR")
        os.mkdir(utr5_path)
        os.mkdir(utr3_path)
        utr5_stat_path = os.path.join(utr5_path, "statistics")
        utr3_stat_path = os.path.join(utr3_path, "statistics")
        os.mkdir(utr5_stat_path)
        os.mkdir(utr3_stat_path)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.tran_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file)
        gen_file(os.path.join(self.terms, "test_term.gff"), self.example.term_file)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.gffs = self.gffs
        args.trans = self.trans
        args.terms = self.terms
        args.out_folder = self.out
        self.utr.run_utr_detection(args)
        self.assertTrue(os.path.exists(os.path.join(utr5_stat_path, "test_5utr_length.png")))
        self.assertTrue(os.path.exists(os.path.join(utr3_stat_path, "test_3utr_length.png")))
Пример #49
0
class TestOptimizeTSSpredator(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_initiate(self):
        args = self.mock_args.mock()
        args.height = 0.9
        args.height_reduction = 0.8
        args.factor = 0.9
        args.factor_reduction = 0.8
        args.base_height = 0.01
        args.enrichment = 0.5
        args.processing = 0.5
        max_num, best_para, current_para, indexs = ot.initiate(args)
        self.assertDictEqual(
            max_num, {
                're_factor': 0.8,
                'processing': 0.5,
                'enrichment': 0.5,
                'height': 0.9,
                'base_height': 0.01,
                're_height': 0.8,
                'factor': 0.9
            })
        self.assertDictEqual(
            best_para, {
                're_factor': 0,
                'processing': 0,
                'enrichment': 0,
                'height': 0,
                'base_height': 0,
                're_height': 0,
                'factor': 0
            })
        self.assertDictEqual(
            current_para, {
                're_factor': 0,
                'processing': 0,
                'enrichment': 0,
                'height': 0,
                'base_height': 0,
                're_height': 0,
                'factor': 0
            })
        self.assertDictEqual(
            indexs, {
                'step': 0,
                'change': False,
                'num': 0,
                'first': True,
                'length': 0,
                'exist': False,
                'switch': 0,
                'extend': False,
                'count': 0
            })

    def test_read_predict_manual_gff(self):
        gff = os.path.join(self.test_folder, "test.gff")
        gen_file(gff, self.example.gff_file)
        args = self.mock_args.mock()
        args.gene_length = 1000
        num, gffs = ot.read_predict_manual_gff(gff, 1000)
        self.assertEqual(num, 1)
        self.assertEqual(gffs[0].start, 633)

    def test_scoring_function(self):
        stat_value = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3}
        best = {
            "tp_rate": 0.8,
            "fp_rate": 0.0005,
            "tp": 100,
            "fp": 31,
            "fn": 45,
            "missing_ratio": 0.004
        }
        ot.scoring_function(best, stat_value, self.example.indexs, 1000)
        self.assertTrue(self.example.indexs["change"])
        self.example.indexs["change"] = False
        stat_value = {"tp_rate": 0.8, "fp_rate": 0.0004, "tp": 100, "fp": 13}
        best = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3}
        ot.scoring_function(best, stat_value, self.example.indexs, 1000)
        self.assertFalse(self.example.indexs["change"])

    def test_load_stat_csv(self):
        stat_file = os.path.join(self.test_folder, "stat.csv")
        gen_file(stat_file, self.example.stat)
        list_num = []
        best_para = {}
        datas = ot.load_stat_csv(self.test_folder, list_num, self.example.best,
                                 best_para, self.example.indexs, 1000,
                                 stat_file)
        self.assertEqual(datas[0], 2)
        self.assertDictEqual(
            datas[1], {
                'fp': 230.0,
                'tp': 789.0,
                'missing_ratio': 0.29991126885536823,
                'fp_rate': 8.15542105020548e-05,
                'tp_rate': 0.7000887311446318,
                'fn': 338.0
            })
        self.assertDictEqual(
            datas[2], {
                'processing': 5.2,
                'base_height': 0.086,
                'factor': 7.6,
                're_height': 2.3,
                're_factor': 5.5,
                'enrichment': 3.1,
                'height': 2.4
            })

    def test_reload_data(self):
        stat_file = os.path.join(self.test_folder, "stat.csv")
        gen_file(stat_file, self.example.stat)
        list_num = []
        best_para = {}
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        datas = ot.reload_data(self.test_folder, list_num, self.example.best,
                               best_para, self.example.indexs, 1000, stat_file,
                               log)
        self.assertDictEqual(
            datas[0], {
                'base_height': 0.086,
                'processing': 5.2,
                'height': 2.4,
                'enrichment': 3.1,
                're_factor': 5.5,
                're_height': 2.3,
                'factor': 7.6
            })
        self.assertDictEqual(
            datas[1], {
                'tp_rate': 0.7000887311446318,
                'tp': 789.0,
                'fn': 338.0,
                'fp': 230.0,
                'fp_rate': 8.15542105020548e-05,
                'missing_ratio': 0.29991126885536823
            })

    def test_extend_data(self):
        best_para = copy.deepcopy(self.example.best_para)
        current_para = ot.extend_data(self.test_folder, self.example.best,
                                      best_para, 100, "aaa")
        self.assertDictEqual(current_para, best_para)

    def test_run_random_part(self):
        list_num = []
        current_para = copy.deepcopy(self.example.ref_para)
        para = ot.run_random_part(current_para, list_num,
                                  self.example.max_nums, 1000,
                                  self.example.indexs)
        self.assertTrue(para != self.example.ref_para)

    def test_run_large_change_part(self):
        list_num = []
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "r", "pre_feature": ""}
        current_para = copy.deepcopy(self.example.ref_para)
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.run_large_change_part(seeds, features, self.example.indexs,
                                        current_para, self.example.max_nums,
                                        best_para, list_num)
        self.assertTrue(para != self.example.ref_para)
        self.assertTrue(para != best_para)

    def test_gen_large_random(self):
        list_num = []
        index_large = {
            0: "height",
            1: "re_height",
            2: "factor",
            3: "re_factor",
            4: "base_height",
            5: "enrichment",
            6: "processing"
        }
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.gen_large_random(self.example.max_nums, "height", 0.2,
                                   list_num, 0.3, best_para, index_large,
                                   self.example.indexs)
        self.assertTrue(para != best_para)
        self.assertTrue(para["height"] > para["re_height"])

    def test_run_small_change_part(self):
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "l", "pre_feature": ""}
        current_para = copy.deepcopy(self.example.ref_para)
        list_num = []
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.run_small_change_part(seeds, features, self.example.indexs,
                                        current_para, best_para, list_num,
                                        self.example.max_nums)
        self.assertTrue(para != best_para)

    def test_small_change(self):
        list_num = []
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.small_change(0.9, "height", 0.2, list_num, 0.5, best_para)
        self.assertTrue(para != 0.5)
        self.assertTrue(para > 0.2)

    def test_plus_process(self):
        list_num = []
        actions = {"plus": False, "minus": False}
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.plus_process("height", best_para, 0.9, 0.5, actions,
                               list_num, 0.2)
        self.assertEqual(para, 0.4)

    def test_minus_process(self):
        list_num = []
        actions = {"plus": False, "minus": False}
        best_para = copy.deepcopy(self.example.best_para)
        para = ot.minus_process("height", best_para, 0.9, 0.5, actions,
                                list_num, 0.1)
        self.assertEqual(para, 0.2)

    def test_compare_manual_predict(self):
        out = StringIO()
        manual = os.path.join(self.test_folder, "manual.gff")
        predict = os.path.join(self.test_folder, "predict.gff")
        gen_file(manual, self.example.manual_file)
        gen_file(predict, self.example.gff_file)
        para_list = [copy.deepcopy(self.example.best_para)]
        args = self.mock_args.mock()
        args.manual = manual
        args.cores = 1
        args.gene_length = 2000
        args.cluster = 3
        ot.compare_manual_predict(1000, para_list, [predict], self.test_folder,
                                  out, args, self.example.mans, 3, 2000)
        self.assertEqual(
            out.getvalue(),
            "1000\the_0.3_rh_0.2_fa_0.7_rf_0.3_bh_0.0_ef_2.5_pf_3.3\tTP=0\tTP_rate=0.0\tFP=2\tFP_rate=0.00100150225338007\tFN=2\tmissing_ratio=0.6666666666666666\n"
        )

    def test_compute_stat(self):
        list_num = [self.example.best_para]
        best_para = {
            're_factor': 0.3,
            'processing': 3.3,
            'enrichment': 2.5,
            'height': 0.5,
            'base_height': 0.0,
            're_height': 0.2,
            'factor': 0.7
        }
        self.example.indexs["change"] = True
        best = {
            "tp_rate": 0.6,
            "fp_rate": 0.0025,
            "tp": 40,
            "fp": 32,
            "fn": 45,
            "missing_ratio": 0.004
        }
        datas = ot.compute_stat(self.example.best, best, best_para, 1,
                                list_num, self.test_folder,
                                self.example.indexs, "aaa")
        self.assertDictEqual(datas[0], self.example.best_para)
        self.assertDictEqual(datas[1], self.example.best)

    def test_run_tss_and_stat(self):
        list_num = [self.example.best_para]
        seeds = {"seed": 0, "pre_seed": []}
        features = {"feature": "l", "pre_feature": ""}
        best_para = {
            're_factor': 0.3,
            'processing': 3.3,
            'enrichment': 2.5,
            'height': 0.5,
            'base_height': 0.0,
            're_height': 0.2,
            'factor': 0.7
        }
        current_para = {
            're_factor': 0.3,
            'processing': 2.3,
            'enrichment': 2.5,
            'height': 0.5,
            'base_height': 0.2,
            're_height': 0.2,
            'factor': 0.7
        }
        stat_out = StringIO()
        wig = os.path.join(self.test_folder, "wig")
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        if not os.path.exists(wig):
            os.mkdir(wig)
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        output_prefix = ["test_aaa"]
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        args = self.mock_args.mock()
        args.steps = 2000
        args.cores = 1
        args.tsspredator_path = "test"
        args.libs = self.example.libs
        args.program = "TSS"
        args.cluster = 3
        args.utr = 200
        args.replicate = "all_2"
        args.replicate_name = "rep"
        args.project_strain = "aaa"
        args.manual = os.path.join(self.test_folder, "manual.gff")
        args.gene_length = 2000
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        gen_file(args.manual, self.example.manual_file)
        datas, set_config, run_tss = ot.run_tss_and_stat(
            self.example.indexs, list_num, seeds, 0.4, 0.3, self.test_folder,
            stat_out, best_para, current_para, wig, fasta, gff,
            self.example.best, 3, args, "aaa", self.example.mans, 2000, log,
            True, True)
        self.assertFalse(datas[0])

    def test_gen_config(self):
        wig = os.path.join(self.test_folder, "wig")
        if not os.path.exists(wig):
            os.mkdir(wig)
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = "all_1"
        args.utr = 200
        args.replicate_name = "test"
        filename = ot.gen_config(self.example.best_para, self.test_folder, 1,
                                 wig, fasta, gff, args, "aaa")
        self.assertEqual(filename, "test_folder/config_1.ini")
        data = import_data("test_folder/config_1.ini")
        self.assertEqual("\n".join(data), self.example.config)

    def test_comparison(self):
        nums = {"overlap": 0, "predict": 0, "manual": 0}
        for index in range(0, 3):
            self.example.mans[index].attributes["print"] = False
            self.example.gffs[index].attributes["print"] = False
        args = self.mock_args.mock()
        args.cluster = 3
        args.gene_length = 2000
        ot.comparison(self.example.mans, self.example.gffs, nums, args, 2000)
        self.assertDictEqual(nums, {'manual': 1, 'predict': 2, 'overlap': 1})

    def test_check_overlap(self):
        nums = {"overlap": 0, "predict": 0, "manual": 0}
        datas = ot.check_overlap(True, None, nums, 2000, self.example.mans[0],
                                 self.example.gffs[0], 100)
        self.assertFalse(datas[0])
        self.assertEqual(datas[1], 140)

    def test_print_lib(self):
        libs = [{
            "condition": 1,
            "replicate": "a",
            "wig": "test_1.wig"
        }, {
            "condition": 2,
            "replicate": "a",
            "wig": "test_2.wig"
        }]
        out = StringIO()
        ot.print_lib(2, libs, out, self.test_folder, "aaa", ["a"])
        self.assertEqual(
            out.getvalue(),
            "aaa_1a = test_folder/test_1.wig\naaa_2a = test_folder/test_2.wig\n"
        )

    def test_import_lib(self):
        out = StringIO()
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        lib_dict = {"fp": [], "fm": [], "np": [], "nm": []}
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"),
            "test")
        args = self.mock_args.mock()
        args.project_strain = "aaa"
        args.program = "TSS"
        args.libs = self.example.libs
        lib_num = ot.import_lib(wig_folder, set(), lib_dict, out, "aaa.gff",
                                [], "aaa.fa", args, "aaa")
        self.assertEqual(lib_num, 1)

    def test_optimization_process(self):
        current_para = copy.deepcopy(self.example.ref_para)
        best_ref_para = copy.deepcopy(self.example.best_para)
        list_num = [best_ref_para]
        indexs = copy.deepcopy(self.example.indexs)
        best_para = {
            're_factor': 0.3,
            'processing': 3.3,
            'enrichment': 2.5,
            'height': 0.6,
            'base_height': 0.0,
            're_height': 0.2,
            'factor': 0.7
        }
        stat_out = StringIO()
        output_prefix = ["test_1"]
        gen_file(os.path.join(self.test_folder, "manual.gff"),
                 self.example.manual_file)
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"),
            "test")
        gen_file(
            os.path.join(
                wig_folder,
                "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"),
            "test")
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = "all_1"
        args.utr = 200
        args.replicate_name = "test"
        args.steps = 2
        args.tsspredator_path = "test"
        args.gene_length = 2000
        args.manual = os.path.join(self.test_folder, "manual.gff")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        ot.optimization_process(indexs, current_para, list_num,
                                self.example.max_nums, best_para,
                                self.test_folder, stat_out, self.example.best,
                                wig_folder, "aaa.fa", "aaa.gff", 2, True, args,
                                "aaa", self.example.mans, 2000, log)
        self.assertDictEqual(
            best_para, {
                're_height': 0.2,
                'factor': 0.7,
                'processing': 3.3,
                'height': 0.6,
                'base_height': 0.0,
                're_factor': 0.3,
                'enrichment': 2.5
            })

    def test_optimization(self):
        ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
        ot.convert2gff = Mock_func().mock_convert2gff
        if not os.path.exists(os.path.join(self.test_folder, "wigs")):
            os.mkdir(os.path.join(self.test_folder, "wigs"))
        wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
        if not os.path.exists(wig_folder):
            os.mkdir(wig_folder)
        fasta = os.path.join(self.test_folder, "aaa.fa")
        gff = os.path.join(self.test_folder, "aaa.gff")
        gen_file(fasta, self.example.fasta)
        gen_file(gff, self.example.gff_file)
        output_prefix = ["test_1"]
        args = self.mock_args.mock()
        args.libs = self.example.libs
        args.cores = 1
        args.cluster = 3
        args.program = "TSS"
        args.project_strain = "aaa"
        args.replicate = "all_1"
        args.utr = 200
        args.steps = 2
        args.gene_length = 2000
        args.height = 0.9
        args.height_reduction = 0.8
        args.factor = 0.9
        args.factor_reduction = 0.8
        args.base_height = 0.01
        args.enrichment = 0.5
        args.processing = 0.5
        args.length = None
        args.replicate_name = "test"
        args.tsspredator_path = "test"
        args.manual = os.path.join(self.test_folder, "manual.gff")
        gen_file(args.manual, self.example.manual_file)
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args.output_folder = self.test_folder
        os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator"))
        ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa",
                        log)
        self.assertTrue(
            os.path.exists(
                os.path.join(self.test_folder, "optimized_TSSpredator",
                             "stat_aaa.csv")))
Пример #50
0
class TestPPI(unittest.TestCase):

    def setUp(self):
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(os.path.join(self.test_folder, "tmp_id_list"))
#            os.mkdir(os.path.join(self.test_folder, "tmp_nospecific"))
            os.mkdir(os.path.join(self.test_folder, "with_strain"))
            os.mkdir(os.path.join(self.test_folder, "with_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "without_strain"))
            os.mkdir(os.path.join(self.test_folder, "without_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "all_results"))
            os.mkdir(os.path.join(self.test_folder, "best_results"))
            os.mkdir(os.path.join(self.test_folder, "figures"))
        self.ppi = PPINetwork(self.test_folder)
        self.mock = Mock_func()
        self.example = Example()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_wget_id(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        strain = "test_strain"
        locus = "test_locus"
        strain_id = {"ptt": "test_strain", "string": "string_test",
                     "file": "file_test"}
        files = {"id_list": "test", "id_log": "test"}
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        detect = self.ppi._wget_id(strain, locus, strain_id, files, log)
        self.assertTrue(detect)

    def test_retrieve_id(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        strain_id = {"ptt": "test_strain", "string": "string_test",
                     "file": "file_test"}
        files = {"id_list": "test", "id_log": "test"}
        genes = [{"strain": "test_strain", "locus_tag": "test_locus",
                  "gene": "dnaA"}]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ppi._retrieve_id(strain_id, genes, files, log)

    def test_get_prefer_name(self):
        row_a = "999.aaa"
        files = {"id_list": self.test_folder}
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        gen_file(os.path.join(self.test_folder, "aaa"),
                 "999.aaa\t222\t333\ttest_aaa")
        name = self.ppi._get_prefer_name(row_a, "test", files, "test", log)
        self.assertEqual(name, "test_aaa")

    def test_get_pubmed(self):
        out_all = StringIO()
        out_best = StringIO()
        out_noall = StringIO()
        out_nobest = StringIO()
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {"id_list": self.test_folder, "id_log": "test",
                 "pubmed_log": "test",
                 "all_specific": out_all, "best_specific": out_best,
                 "all_nospecific": out_noall, "best_nospecific": out_nobest}
        row = self.example.ppi_line.split("\t")
        strain_id = {"file": "test_file","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        mode = "interaction"
        actor = "test_A"
        score = 11241
        id_file = "SAOUHSC_01684"
        ptt = "test_ptt"
        gen_file(os.path.join(self.test_folder, "SAOUHSC_01684"),
                 "93061.SAOUHSC_01684\t93061.SAOUHSC_01683\t333\ttest_aaa")
        gen_file(os.path.join(self.test_folder, "SAOUHSC_01683"),
                 "93061.SAOUHSC_01683\t93061.SAOUHSC_01684\t333\ttest_bbb")
        gen_file(os.path.join(self.test_folder, "tmp_specific"),
                 "")
        gen_file(os.path.join(self.test_folder, "tmp_nospecific"),
                 "12949105\t0.151711\n1404401\t-0.210303")
        paths = {"all": self.test_folder, "fig": self.test_folder,
                 "best": self.test_folder}
        querys = "all"
        first_output = {"specific_all": True, "specific_best": True,
                        "nospecific_all": True, "nospecific_best": True}
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.querys = "all"
        args.no_specific = True
        args.score = 0
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ppi._get_pubmed(row, strain_id, mode, actor, id_file, first_output,
                             ptt, files, paths, args, log)
        data = import_data(
            "test_folder/without_strain/test_ptt/test_aaa_test_bbb.csv")
        self.assertEqual("\n".join(data), self.example.without_out)
        data = import_data(
            "test_folder/with_strain/test_ptt/test_aaa_test_bbb.csv")
        self.assertEqual("\n".join(data), self.example.with_out)

    def test_merge_information(self):
        first_output = {"specific_all": True, "specific_best": True,
                        "nospecific_all": True, "nospecific_best": True}
        out_all = StringIO()
        out_best = StringIO()
        row_a = self.example.ppi_line.split("\t")
        score = 111
        id_file = "SAOUHSC_01684"
        id_folder = self.test_folder
        file_type = "specific"
        all_folder = os.path.join(self.test_folder, "with_strain")
        best_folder = os.path.join(self.test_folder, "without_strain")
        ptt = "test_ptt"
        filename = os.path.join(self.test_folder, "SAOUHSC_01684")
        gen_file(filename, "93061.SAOUHSC_01684\t1000\t333\ttest_aaa")
        self.ppi._merge_information(first_output, filename, out_all, out_best,
                           row_a, score, id_file, id_folder, file_type,
                           all_folder, best_folder, ptt)
        self.assertEqual(out_all.getvalue(), self.example.merge_out + "\n")
        self.assertEqual(out_best.getvalue(), self.example.merge_out + "\n")

    def test_detect_protein(self):
        gen_file(os.path.join(self.test_folder, "test"), self.example.ptt_file)
        strain_id = {"file": "test","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        args = self.mock_args.mock()
        args.ptts = self.test_folder
        args.querys = "all"
        genes = self.ppi._detect_protein(strain_id, args)
        self.assertListEqual(genes, [
             {'gene': 'SAOUHSC_00001', 'locus_tag': 'dnaA', 'strain': 'Staphylococcus_aureus_HG003'},
             {'gene': 'SAOUHSC_00002', 'locus_tag': '-', 'strain': 'Staphylococcus_aureus_HG003'},
             {'gene': 'SAOUHSC_00003', 'locus_tag': '-', 'strain': 'Staphylococcus_aureus_HG003'}])

    def test_setup_nospecific(self):
        out_all = StringIO()
        out_best = StringIO()
        out_noall = StringIO()
        out_nobest = StringIO()
        paths = {"all": os.path.join(self.test_folder, "all_results"),
                 "fig": os.path.join(self.test_folder, "figures"),
                 "best": os.path.join(self.test_folder, "best_results")}
        strain_id = {"file": "test","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        files = {"id_list": self.test_folder, "id_log": "test",
                 "pubmed_log": "test",
                 "all_specific": out_all, "best_specific": out_best,
                 "all_nospecific": out_noall,
                 "best_nospecific": out_nobest}
        self.ppi._setup_nospecific(paths, strain_id, files)
        files["all_nospecific"].close()
        files["best_nospecific"].close()
        self.assertTrue(os.path.exists(
            "test_folder/all_results/without_strain/test_ptt"))
        self.assertTrue(os.path.exists(
            "test_folder/best_results/without_strain/test_ptt"))
        self.assertTrue(os.path.exists(
            "test_folder/figures/without_strain/test_ptt"))

    def test_setup_folder_and_read_file(self):
        paths = {"all": os.path.join(self.test_folder, "all_results"),
                 "fig": os.path.join(self.test_folder, "figures"),
                 "best": os.path.join(self.test_folder, "best_results")}
        strain_id = {"file": "test.ptt","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        files = {"id_list": self.test_folder, "id_log": "", "pubmed_log": "",
                 "all_specific": "", "best_specific": "",
                 "all_nospecific": "", "best_nospecific": "", "action_log": ""}
        gen_file(os.path.join(self.test_folder, "test.ptt"),
                 self.example.ptt_file)
        args = self.mock_args.mock()
        args.querys = "all"
        args.no_specific = True
        args.out_folder = self.test_folder
        args.ptts = self.test_folder
        genes = self.ppi._setup_folder_and_read_file(strain_id, "",
                                                     files, paths, args)
        for index in ("all_specific", "all_nospecific",
                      "best_specific", "best_nospecific",
                      "id_log", "action_log", "pubmed_log"):
            files[index].close()
        self.assertTrue(os.path.exists("test_folder/best_results/test"))
        self.assertTrue(os.path.exists("test_folder/all_results/test"))
        self.assertListEqual(genes, [
             {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': 'dnaA', 'gene': 'SAOUHSC_00001'},
             {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00002'},
             {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00003'}])

    def test_wget_actions(self):
        gen_file(os.path.join(self.test_folder, "test.txt"), "93061\ttest")
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {"id_list": self.test_folder, "id_log": "", "pubmed_log": "",
                 "all_specific": "", "best_specific": "",
                 "all_nospecific": "", "best_nospecific": "", "action_log": ""}
        strain_id = {"file": "test.ptt","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        id_file = "test.txt"
        self.ppi._wget_actions(files, id_file, strain_id, self.test_folder, log)

    def test_retrieve_actions(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {"id_list": os.path.join(self.test_folder, "tmp_id_list"),
                 "id_log": "", "pubmed_log": "",
                 "all_specific": "", "best_specific": "",
                 "all_nospecific": "", "best_nospecific": "", "action_log": ""}
        strain_id = {"file": "test.ptt","ptt": "test_ptt",
                     "string": "test_string", "pie": "test_pie"}
        paths = {"all": os.path.join(self.test_folder, "all_results"),
                 "fig": os.path.join(self.test_folder, "figures"),
                 "best": os.path.join(self.test_folder, "best_results")}
        gen_file(os.path.join(self.test_folder, "tmp_id_list/test.txt"),
                 "93061\ttest")
        gen_file(os.path.join(self.test_folder, "tmp_action"),
                 self.example.ppi_line)
        args = self.mock_args.mock()
        args.no_specific = True
        args.querys = "all"
        args.out_folder = self.test_folder
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ppi._retrieve_actions(files, strain_id, paths, args, log)
Пример #51
0
class TestsRNAUTR(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock = Mock_func()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_data(self):
        pos = {"start": 4, "end": 40, "ori_start": 2, "ori_end": 3}
        datas = sud.import_data("+", "aaa", pos, "3UTR", "TSS",
                                "cds", "srna_cover", "test")
        self.assertDictEqual(datas, {
            'start_cleavage': 'NA', 'strand': '+', 'end_cleavage': 'test',
            'start_tss': 'cds', 'end': 40, 'start': 4, 'utr': '3UTR',
            'strain': 'aaa', 'datas': 'srna_cover'})

    def test_read_data(self):
        args = self.mock_args.mock()
        args.gff_file = os.path.join(self.test_folder, "test.gff")
        args.ta_file = os.path.join(self.test_folder, "test.gff")
        args.tss_file = os.path.join(self.test_folder, "test.gff")
        args.pro_file = os.path.join(self.test_folder, "test.gff")
        args.seq_file = os.path.join(self.test_folder, "test.fa")
        gen_file(args.gff_file, self.example.gff_file)
        gen_file(args.seq_file, self.example.seq_file)
        args.hypo = False
        cdss, tas, tsss, pros, seq = sud.read_data(args)
        self.assertEqual(cdss[0].start, 4)
        self.assertEqual(tas[0].start, 4)
        self.assertEqual(tsss[0].start, 4)
        self.assertEqual(pros[0].start, 4)
        self.assertDictEqual(
            seq,
            {'aaa': 'ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT'})

    def test_get_terminal(self):
        inters = []
        seq = {"aaa": "ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT"}
        sud.get_terminal(self.example.gffs, inters, seq, "start")
        self.assertListEqual(inters, [{'end': 4, 'len_CDS': 0, 'strand': '+',
                                       'strain': 'aaa', 'start': 1}])

    def test_get_inter(self):
        inters = []
        sud.get_inter(self.example.gffs, inters)
        self.assertListEqual(inters, [{'start': 14, 'strand': '+', 'end': 20,
                                       'strain': 'aaa', 'len_CDS': 10}])

    def test_set_cover_and_point(self):
        covers = [2, 3, 4, 1, 6, 2, 8, 3, 5, 6, 7, 5, 2, 1]
        cover_results = {"covers": None, "check_point": None}
        pos = {"start": 2, "end": 6, "ori_start": 2, "ori_end": 3}
        sud.set_cover_and_point(cover_results, self.example.inters[0],
                                covers, pos, 5)
        self.assertListEqual(cover_results["covers"],
                             [2, 3, 4, 1, 6, 2, 8, 3, 5])
        self.assertDictEqual(cover_results["check_point"],
                             {'srna_start': 0, 'utr_start': 2,
                              'utr_end': 3, 'srna_end': 12})

    def test_check_import_srna_covers(self):
        args = self.mock_args.mock()
        cover = {"type": "5utr"}
        datas = {"num": 0, "cover_tmp": {"total": 100, "ori_total": 200},
                  "checks": {"detect_decrease": True},
                 "final_poss": {"start": 3, "end": 23}}
        cover_results = {"cover_sets": {"high": 50, "low": 10},
                         "srna_covers": {"cond_1": []},
                         "utr_covers": {"cond_1": []},
                         "type": "5utr", "intercds": "TSS"}
        args.min_len = 30
        args.max_len = 500
        pos = {"start": 1, "end": 25, "ori_start": 1, "ori_end": 25}
        sud.check_import_srna_covers(datas, cover_results,
                                     self.example.inters[0], "cond_1", "track",
                                     cover, pos, args, "5utr")
        self.assertDictEqual(datas["final_poss"], {'end': 23, 'start': 3})
        self.assertDictEqual(cover_results["srna_covers"], {
            'cond_1': [{'final_start': 3, 'high': 50, 'ori_avg': 8.0,
                        'final_end': 23, 'low': 10, 'type': '5utr',
                        'avg': 4, 'track': 'track'}]})
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])
        datas["checks"] = {"detect_decrease": False}
        cover_results["srna_covers"] = {"cond_1": []}
        cover_results["utr_covers"] = {"cond_1": []}
        sud.check_import_srna_covers(
            datas, cover_results, self.example.inters[0], "cond_1", "track",
            cover, pos, args, "5utr")
        self.assertDictEqual(cover_results["srna_covers"], {'cond_1': []})

    def test_check_pos(self):
        cover = {"pos": 4}
        check_point = {"utr_start": 1, "utr_end": 29,
                       "srna_start": 3, "srna_end": 11}
        checks = {"srna": False, "utr": False}
        sud.check_pos(cover, check_point, checks, 4)
        self.assertDictEqual(checks, {'srna': True, 'utr': True})

    def test_get_cover_5utr(self):
        args = self.mock_args.mock()
        datas = {"num": 0, "cover_tmp": {"5utr": 0},
                 "checks": {"detect_decrease": True},
                 "final_poss": {"start": 1, "end": 26}}
        cover = 20
        cover_sets = {"high": 50, "low": 10}
        args.decrease_utr = 50
        args.fuzzy_utr = 2
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 10})
        self.assertEqual(datas["num"], 0)
        self.assertTrue(go_out)
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 0})
        self.assertDictEqual(cover_sets, {'high': 50, 'low': 10})
        cover = 20
        datas = {"num": 0, "cover_tmp": {"5utr": 30},
                 "checks": {"detect_decrease": True},
                 "final_poss": {"start": 1, "end": 26}}
        cover_sets = {"low": 10, "high": 50}
        args.decrease_utr = 0.5
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertEqual(datas["num"], 1)
        self.assertFalse(go_out)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 26})
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 20})
        self.assertDictEqual(cover_sets, {'low': 20, 'high': 50})

    def test_detect_cover_utr_srna(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        cover_results = {"cover_sets": {"low": 10, "high": 50},
                         "pos": {"low": 10, "high": 50},
                         "covers": [20], "type": "5utr",
                         "srna_covers": {"frag_1": []},
                         "utr_covers": {"frag_1": []}, "intercds": "TSS",
                         "check_point": {"utr_start": 1, "utr_end": 29,
                         "srna_start": 2, "srna_end": 25}}
        datas = {"num": 0, "cover_tmp": {"total": 100, "ori_total": 200},
                 "checks": {"detect_decrease": True},
                 "final_poss": {"start": 3, "end": 23}}
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 23}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        sud.detect_cover_utr_srna(cover_results, pos, self.example.inters[0],
                                  "frag_1", "track_1", args, "frag",
                                  2, 20, "+")
        self.assertDictEqual(
            cover_results["srna_covers"],
            {'frag_1': [{'low': 20, 'high': 50, 'track': 'track_1',
                         'final_start': 2, 'ori_avg': 0.8695652173913043,
                         'type': 'frag', 'final_end': 20,
                         'avg': 1.0526315789473684}]})
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])
        self.assertDictEqual(cover_results["cover_sets"],
                             {'best': 20, 'low': 20, 'high': 50})

    def test_get_coverage(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        sud.detect_cover_utr_srna = self.mock.mock_detect_cover_utr_srna
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        srna_covers, utr_covers = sud.get_coverage(
            self.example.wigs, self.example.inters[0],
            pos, "3utr", "TSS", args)
        self.assertDictEqual(
            srna_covers,
            {'frag_1': [{'track': 'track_1', 'high': 50,
                         'final_start': 2, 'type': 'frag',
                         'avg': 8.052631578947368, 'low': 10,
                         'final_end': 3, 'ori_avg': 2.12}]})
        self.assertDictEqual(utr_covers, srna_covers)

    def test_get_utr_cutoff(self):
        mediandict = {"aaa": {"5utr": {"bbb": {}}}}
        avgs = [30, 60, 550, 302, 44]
        sud.get_utr_cutoff("p_0.5", mediandict, avgs, "aaa", "5utr", "bbb")
        self.assertDictEqual(
            mediandict,
            {'aaa': {'5utr': {'bbb': {'mean': 197.2, 'median': 60}}}})

    def test_detect_normal(self):
        sud.get_coverage = self.mock.mock_get_coverage
        diff = 50
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.utrs = []
        args.srnas = []
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0],
                          pos, "3utr", self.example.tsss[0], args)
        self.assertListEqual(
            args.srnas, [{'end': 20, 'strand': '+',
                         'datas': {'frag_1': [{
                             'track': 'track_1',
                             'final_start': 2, 'avg': 41.36842105263158,
                             'high': 50, 'type': 'frag', 'final_end': 20,
                             'ori_avg': 27.52, 'low': 10}]},
                         'end_cleavage': 'NA',
                         'utr': '3utr', 'start_cleavage': 'NA',
                         'strain': 'aaa',
                         'start': 2, 'start_tss': 'TSS:1_+'}])
        self.assertListEqual(
            args.utrs, [{'end': 20, 'strand': '+',
                         'datas': {'frag_1': [{
                             'track': 'track_1',
                             'final_start': 2, 'avg': 41.36842105263158,
                             'high': 50, 'type': 'frag', 'final_end': 20,
                             'ori_avg': 27.52, 'low': 10}]},
                         'end_cleavage': 'NA',
                         'utr': '3utr', 'start_cleavage': 'NA',
                         'strain': 'aaa',
                         'start': 2, 'start_tss': 'NA'}])
        args.utrs = []
        args.srnas = []
        args.pros = self.example.pros
        args.min_len = 3
        args.max_len = 20
        pos = {"start": 2, "end": 24, "ori_start": 1, "ori_end": 25}
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0],
                          pos, "3utr", self.example.tsss[0], args)
        self.assertListEqual(
            args.srnas, [
                {'start': 1, 'end': 18, 'start_tss': 'TSS:1_+',
                 'datas': {'frag_1': [{
                     'ori_avg': 27.52, 'track': 'track_1',
                     'high': 50, 'low': 10, 'type': 'frag', 'final_end': 20,
                     'avg': 41.36842105263158, 'final_start': 2}]},
                 'start_cleavage': 'NA', 'end_cleavage': 'Cleavage:18_+',
                 'utr': '3utr', 'strand': '+', 'strain': 'aaa'}])
        sud.get_coverage = get_coverage

    def test_detect_3utr_pro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 1
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = self.example.pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        sud.detect_3utr_pro(self.example.inters[0], pos,
                            self.example.wigs, "3utr", args)
        self.assertListEqual(
            args.srnas, [{'end_cleavage': 'NA', 'end': 20,
                          'start_cleavage': 'Cleavage:18_+', 'utr': '3utr',
                          'datas': {'frag_1': [{
                              'low': 10, 'final_start': 2,
                              'track': 'track_1', 'type': 'frag',
                              'final_end': 20,
                              'avg': 41.36842105263158,
                              'ori_avg': 27.52, 'high': 50}]},
                          'strand': '+', 'start_tss': 'NA', 'start': 18,
                          'strain': 'aaa'}])
        self.assertListEqual(
            args.utrs, [{'end_cleavage': 'NA', 'end': 20,
                         'start_cleavage': 'NA',
                         'utr': '3utr', 'datas': {'frag_1': [{
                             'low': 10, 'final_start': 2,
                             'track': 'track_1', 'type': 'frag',
                             'final_end': 20, 'avg': 41.36842105263158,
                             'ori_avg': 27.52, 'high': 50}]},
                         'strand': '+', 'start_tss': 'NA',
                         'start': 18, 'strain': 'aaa'}])
        sud.get_coverage = get_coverage

    def test_detect_twopro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        pro_dict = [{"seq_id": "aaa", "source": "tsspredator",
                     "feature": "processing", "start": 18,
                     "end": 18, "phase": ".", "strand": "+", "score": "."},
                    {"seq_id": "aaa", "source": "tsspredator",
                     "feature": "processing", "start": 38,
                     "end": 38, "phase": ".", "strand": "+", "score": "."}]
        attributes_pro = [{"ID": "processing0", "Name": "Processing_0"},
                          {"ID": "processing1", "Name": "Processing_1"}]
        pros = []
        for index in range(0, 2):
            pros.append(Create_generator(
                pro_dict[index], attributes_pro[index], "gff"))
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 3
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 50, "ori_start": 1, "ori_end": 25}
        sud.detect_twopro(self.example.inters[0], pos, self.example.wigs,
                          "interCDS", "interCDS", args)
        self.assertListEqual(
            args.srnas, [{'start_cleavage': 'Cleavage:18_+', 'utr': 'interCDS',
                          'datas': {'frag_1': [{
                              'type': 'frag', 'low': 10,
                              'final_start': 2, 'high': 50,
                              'avg': 41.36842105263158,
                              'final_end': 20, 'track': 'track_1',
                              'ori_avg': 27.52}]},
                          'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+',
                          'strand': '+', 'end': 38, 'strain': 'aaa',
                          'start': 18}])
        self.assertListEqual(
            args.utrs, [{'start_cleavage': 'NA', 'utr': 'interCDS',
                         'datas': {'frag_1': [{
                             'type': 'frag', 'low': 10,
                             'final_start': 2, 'high': 50,
                             'avg': 41.36842105263158,
                             'final_end': 20, 'track': 'track_1',
                             'ori_avg': 27.52}]},
                         'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+',
                         'strand': '+', 'end': 38, 'strain': 'aaa',
                         'start': 18}])
        sud.get_coverage = get_coverage

    def test_run_utr_detection(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"5utr": "n_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        sud.get_coverage = self.mock.mock_get_coverage
        sud.run_utr_detection(self.example.wigs, self.example.inters[0],
                              2, 50, "5utr", args)
        sud.get_coverage = get_coverage
        self.assertListEqual(
            args.srnas, [{'start': 1, 'end': 50, 'start_cleavage': 'NA',
                          'datas': {'frag_1': [{
                              'high': 50, 'final_end': 20,
                              'avg': 41.36842105263158, 'low': 10,
                              'ori_avg': 27.52, 'final_start': 2,
                              'type': 'frag', 'track': 'track_1'}]},
                          'start_tss': 'TSS:1_+', 'strain': 'aaa',
                          'strand': '+',
                          'utr': '5utr', 'end_cleavage': 'NA'}])
        self.assertListEqual(
            args.utrs, [{'start': 1, 'end': 50, 'start_cleavage': 'NA',
                         'datas': {'frag_1': [{
                             'high': 50, 'final_end': 20,
                             'avg': 41.36842105263158, 'low': 10,
                             'ori_avg': 27.52, 'final_start': 2,
                             'type': 'frag', 'track': 'track_1'}]},
                         'start_tss': 'NA', 'strain': 'aaa', 'strand': '+',
                         'utr': '5utr', 'end_cleavage': 'NA'}])

    def test_class_utr(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"3utr": "p_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        args.wig_fs = self.example.wigs
        sud.get_coverage = self.mock.mock_get_coverage
        sud.class_utr(self.example.inters[0], self.example.tas[0],
                      args, args.wig_fs, args.wig_fs)
        sud.get_coverage = get_coverage
        self.assertListEqual(
            args.srnas, [{'end_cleavage': 'NA', 'start_tss': 'TSS:1_+',
                          'utr': '3utr', 'start_cleavage': 'NA', 'end': 20,
                          'start': 1, 'datas': {'frag_1': [{
                              'ori_avg': 27.52,
                              'final_start': 2, 'avg': 41.36842105263158,
                              'track': 'track_1', 'type': 'frag',
                              'final_end': 20, 'low': 10, 'high': 50}]},
                          'strain': 'aaa', 'strand': '+'},
                         {'end_cleavage': 'NA', 'start_tss': 'NA',
                          'utr': '3utr', 'start_cleavage': 'Cleavage:18_+',
                          'end': 20, 'start': 18, 'datas': {'frag_1': [{
                              'ori_avg': 27.52, 'final_start': 2,
                              'avg': 41.36842105263158, 'track': 'track_1',
                              'type': 'frag', 'final_end': 20, 'low': 10,
                              'high': 50}]},
                          'strain': 'aaa', 'strand': '+'}])
        self.assertListEqual(
            args.utrs, [{'end_cleavage': 'NA', 'start_tss': 'NA',
                         'utr': '3utr', 'start_cleavage': 'NA', 'end': 20,
                         'start': 1, 'datas': {'frag_1': [{
                             'ori_avg': 27.52, 'final_start': 2,
                             'avg': 41.36842105263158, 'track': 'track_1',
                             'type': 'frag', 'final_end': 20, 'low': 10,
                             'high': 50}]},
                         'strain': 'aaa', 'strand': '+'},
                        {'end_cleavage': 'NA', 'start_tss': 'NA',
                         'utr': '3utr', 'start_cleavage': 'NA', 'end': 20,
                         'start': 18, 'datas': {'frag_1': [{
                             'ori_avg': 27.52, 'final_start': 2,
                             'avg': 41.36842105263158, 'track': 'track_1',
                             'type': 'frag', 'final_end': 20, 'low': 10,
                             'high': 50}]},
                         'strain': 'aaa', 'strand': '+'}])

    def test_get_utr_coverage(self):
        utrs = [{'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18,
                 'start_tss': 'NA', 'datas': {'frag_1': [{
                     'final_end': 20, 'track': 'track_1', 'final_start': 2,
                     'ori_avg': 27.52, 'avg': 41.36842105263158,
                     'type': 'frag', 'low': 10, 'high': 50}]},
                 'end_cleavage': 'NA', 'strain': 'aaa',
                 'start_cleavage': 'NA'}]
        covers = sud.get_utr_coverage(utrs)
        self.assertDictEqual(covers, {'aaa': {'interCDS': {},
                                      '3utr': {'track_1': [27.52]},
                                      '5utr': {}}})

    def test_set_cutoff(self):
        args = self.mock_args.mock()
        args.texs = {"track_4@AND@track_6": 0}
        covers = {'aaa': {'5utr': {'track_4': [52, 11, 23]},
                          'inter': {'track_3': [111]},
                  'total': {'track_1': [27.52, 111]},
                            '3utr': {'track_1': [27.52, 111]},
                  'interCDS': {'track_2': [12, 0]}}}
        args.coverages = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"}
        args.cover_notex = {"5utr": "p_0.3", "3utr": "n_10",
                            "interCDS": "p_0.5"}
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(mediandict, {'aaa': {'5utr': {'track_4': {
            'median': 11, 'mean': 28.666666666666668}},
                                                  'interCDS': {'track_2': {}},
                                                  '3utr': {'track_1': {}}}})
        args.cover_notex = None
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(mediandict, {'aaa': {
            '3utr': {'track_1': {'mean': 69.26, 'median': 10.0}},
            '5utr': {'track_4': {'mean': 28.666666666666668, 'median': 11}},
            'interCDS': {'track_2': {'mean': 6.0, 'median': 0}}}})

    def test_mean_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        mean = sud.mean_score(lst)
        self.assertEqual(mean, 5.0)

    def test_median_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        median = sud.median_score(lst, 0.5)
        self.assertEqual(median, 5)

    def test_detect_srna(self):
        sud.replicate_comparison = self.mock.mock_replicate_comparison
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        gffs = []
        ncs = []
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        median = {"aaa": {"3utr": 555}}
        args.srnas = [{'strand': '+', 'utr': '3utr', 'end': 20,
                       'start': 18, 'start_tss': 'NA',
                       'datas': {'frag_1': [{
                           'final_end': 20, 'track': 'track_1',
                           'final_start': 2, 'ori_avg': 27.52,
                           'avg': 41.36842105263158,
                           'type': 'frag', 'low': 10, 'high': 50,
                           "conds": ["frag"]}]},
                       'end_cleavage': 'NA',
                       'strain': 'aaa', 'start_cleavage': 'Cleavage:18_+',
                       'detail': {'avg': 41.36842105263158,
                           'type': 'frag', 'low': 10, 'high': 50,
                           "conds": ["frag"]}}]
        sud.detect_srna(median, args)
        self.assertEqual(args.out.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t18\t20\t.\t+\t.\t"
                          "ID=aaa_srna_utr0;Name=UTR_sRNA_00000;"
                          "sRNA_type=3utr;best_avg_coverage=500;"
                          "best_high_coverage=700;best_low_coverage=400;"
                          "with_TSS=NA;start_cleavage=Cleavage:18_+;"
                          "end_cleavage=NA\n"))
        self.assertEqual(args.out_t.getvalue(),
                         ("aaa\t00000\t18\t20\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=200;high=700;low=400)\n"))

    def test_print_file(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        srna = {'strand': '+', 'utr': '3utr', 'end': 20,
                'start': 18, 'start_tss': 'NA', 'datas': {'frag_1': [{
                    'final_end': 20, 'track': 'track_1',
                    'final_start': 2, 'ori_avg': 27.52,
                    'avg': 41.36842105263158, 'type': 'frag', 'low': 10,
                    'high': 50, "conds": ["frag"]}]},
                'end_cleavage': 'NA', 'strain': 'aaa',
                'start_cleavage': 'Cleavage:18_+'}
        srna_datas = {"detail": [{"best": 500, "track": "frag", "high": 700, "low": 400,
                      "start": 100, "end": 202, "conds": {"frag_1": "track_1"}, "avg": 200}],
                      "conds": {"frag_1": "track_1"}, "best": 500, "track": "frag", "high": 700, "low": 400,
                      "start": 100, "end": 202}
        sud.print_file(0, srna, 2, 50, srna_datas, args)
        self.assertEqual(args.out.getvalue(), 
                         ("aaa\tANNOgesic\tncRNA\t2\t50\t.\t+\t.\t"
                          "ID=aaa_srna_utr0;Name=UTR_sRNA_00000;"
                          "sRNA_type=3utr;best_avg_coverage=500;"
                          "best_high_coverage=700;best_low_coverage=400;"
                          "with_TSS=NA;start_cleavage=Cleavage:18_+;"
                          "end_cleavage=NA\n"))
        self.assertEqual(args.out_t.getvalue(),
                         ("aaa\t00000\t2\t50\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=200;high=700;low=400)\n"))
Пример #52
0
class TestConverter(unittest.TestCase):
    def setUp(self):
        self.converter = Converter()
        self.example = Example()
        self.converter.gff3parser = Mock_gff3_parser
        self.converter._print_rntptt_title = Mock_func().print_rntptt_title
        self.converter.tsspredator = Mock_TSSPredatorReader()
        self.converter._read_file = Mock_func().mock_read_file
        self.gff_file = self.example.gff_file
        self.ptt_out = self.example.ptt_out
        self.rnt_out = self.example.rnt_out
        self.srna_out = self.example.srna_out
        self.embl_file = self.example.embl_file
        self.embl_out = self.example.embl_out
        self.multi_embl = self.example.multi_embl
        self.gff_out = self.example.gff_out
        self.mastertable = self.example.mastertable
        self.tss_file = self.example.tss_file
        self.fasta_file = self.example.fasta_file
        self.transterm = self.example.transterm
        self.term_file = self.example.term_file
        self.circ_file = self.example.circrna_table
        self.circ_all = self.example.circrna_all
        self.circ_best = self.example.circrna_best
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_print_rntptt_file(self):
        cdss = []
        genes = []
        rnas = []
        gff_dict = Example().gff_dict
        for gff in gff_dict:
            if gff["feature"] == "gene":
                genes.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "CDS":
                cdss.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "tRNA":
                rnas.append(self.converter.gff3parser.entries(self, gff))
        out_p = StringIO()
        out_r = StringIO()
        self.converter._print_rntptt_file(out_p, cdss, genes)
        self.converter._print_rntptt_file(out_r, rnas, genes)
        self.assertEqual(out_p.getvalue().split("\n")[:-1],
                         self.example.ptt_out_list)
        self.assertEqual(out_r.getvalue().split("\n")[:-1],
                         self.example.rnt_out_list)
        out_p.close()
        out_r.close()

    def test_srna2pttrnt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        srnas = []
        self.converter._srna2rntptt(srna_input_file, srna_output_file, srnas,
                                    1234567)
        datas = import_data(srna_output_file)
        self.assertEqual(set(datas), set(self.srna_out.split("\n")))

    def test_multi_embl_pos(self):
        embls = []
        for line in self.embl_file.split("\n"):
            datas = self.converter._multi_embl_pos(line.strip())
            if datas != "Wrong":
                embls.append(datas)
        for index in range(0, 7):
            self.assertDictEqual(embls[index], self.embl_out[index])
        for index in range(0, 2):
            self.assertDictEqual(embls[-1]["pos"][index],
                                 self.multi_embl[index])

    def test_parser_embl_data(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        embl_out = os.path.join(self.test_folder, "test.embl_out")
        out = StringIO()
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        info = self.converter._parser_embl_data(embl_file, out)
        datas = out.getvalue().split("\n")
        self.assertEqual(set(datas[:-1]), set(self.gff_out.split("\n")))
        self.assertEqual(info[0], "NC_007795.1")
        for index in range(0, 2):
            self.assertDictEqual(info[1]["pos"][index], self.multi_embl[index])
        out.close()

    def test_multi_tss_class(self):
        nums = {"tss": 0, "tss_uni": 0, "class": 1}
        utrs = {"total": [], "pri": [], "sec": []}
        tss_features = {"tss_types": [], "locus_tags": [], "utr_lengths": []}
        tss_index = defaultdict(lambda: 0)
        master_file = os.path.join(self.test_folder, "test.tsv")
        fh = StringIO(self.mastertable)
        for tss in self.converter.tsspredator.entries(fh):
            self.converter._multi_tss_class(tss, tss_index, tss_features, nums,
                                            utrs)
        fh.close()
        self.assertDictEqual(nums, {'tss_uni': 0, 'class': 5, 'tss': 2})

    def test_convert_mastertable2gff(self):
        master_file = os.path.join(self.test_folder, "test.tsv")
        with open(master_file, "w") as th:
            th.write(self.mastertable)
        out_gff = os.path.join(self.test_folder, "test.tsv_out")
        self.converter.convert_mastertable2gff(master_file, "ANNOgesic", "TSS",
                                               "aaa", out_gff)
        datas = import_data(out_gff)
        self.assertEqual(set(datas), set(self.tss_file.split("\n")))

    def test_convert_gff2rntptt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        gff_file = os.path.join(self.test_folder, "test.gff")
        rnt_file = os.path.join(self.test_folder, "test.rnt")
        ptt_file = os.path.join(self.test_folder, "test.ptt")
        fasta_file = os.path.join(self.test_folder, "test.fa")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        with open(gff_file, "w") as fh:
            fh.write(self.gff_file)
        with open(fasta_file, "w") as fh:
            fh.write(self.fasta_file)
        self.converter.convert_gff2rntptt(gff_file, fasta_file, ptt_file,
                                          rnt_file, srna_input_file,
                                          srna_output_file)
        self.assertTrue(srna_output_file)
        self.assertTrue(rnt_file)
        self.assertTrue(ptt_file)

    def test_convert_embl2gff(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        gff_file = os.path.join(self.test_folder, "test.embl_out")
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        self.converter.convert_embl2gff(embl_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas[1:-2]), set(self.gff_out.split("\n")))

    def test_convert_transtermhp2gff(self):
        transterm_file = os.path.join(self.test_folder,
                                      "test_best_terminator_after_gene.bag")
        gff_file = os.path.join(self.test_folder, "transterm.gff")
        with open(transterm_file, "w") as th:
            th.write(self.transterm)
        self.converter.convert_transtermhp2gff(transterm_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas), set(self.term_file.split("\n")))

    def get_info(datas):
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        return f_datas

    def test_convert_circ2gff(self):
        circ_file = os.path.join(self.test_folder, "circ.csv")
        out_all = os.path.join(self.test_folder, "all.gff")
        out_filter = os.path.join(self.test_folder, "best.gff")
        with open(circ_file, "w") as ch:
            ch.write(self.circ_file)
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        self.converter.convert_circ2gff(circ_file, args, out_all, out_filter)
        datas = import_data(out_all)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_all.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
        datas = import_data(out_filter)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_best.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
Пример #53
0
class TestSubLocal(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.gffs = "test_folder/gffs"
        self.stat = "test_folder/stat"
        self.trans = "test_folder/tran"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.stat)
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.out_folder = self.out
        args.trans = self.trans
        self.sub = SubLocal(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_protein_seq(self):
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        gff = "aaa.gff"
        gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file)
        gen_file(os.path.join(self.trans, "aaa_transcript.gff"),
                 self.example.tran_file)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans,
                                           args, log)
        self.assertEqual(prefix, "aaa")

    def test_run_psortb(self):
        self.sub._psortb = self.mock.mock_psortb
        tmp_result = os.path.join(self.out, "tmp_results")
        os.mkdir(tmp_result)
        args = self.mock_args.mock()
        args.psortb_path = "psortb_path"
        args.gram = "positive"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.sub._run_psortb(args, "aaa", self.out, self.test_folder,
                             tmp_result, log)
        self.assertTrue(os.path.exists(os.path.join(self.out, "tmp_log")))
        self.assertTrue(
            os.path.exists(
                os.path.join(tmp_result, "_".join(["aaa", "raw.txt"]))))

    def test_merge_and_stat(self):
        su.stat_sublocal = self.mock.mock_stat_sublocal
        os.mkdir(os.path.join(self.gffs, "aaa.gff_folder"))
        gen_file(os.path.join(self.gffs, "aaa.gff_folder/aaa.gff"), "test")
        os.mkdir(os.path.join(self.out, "psortb_results"))
        gen_file(os.path.join(self.out, "aaa_raw.txt"), "test")
        gen_file(os.path.join(self.out, "aaa_table.csv"), "test")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.sub._merge_and_stat(self.gffs, self.out, self.test_folder,
                                 self.stat, log)
        self.assertTrue(os.path.exists(os.path.join(self.stat, "aaa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "aaa")))

    def test_compare_cds_tran(self):
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tran_file = os.path.join(self.test_folder, "aaa_transcript.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.tran_file)
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.sub._compare_cds_tran(gff_file, tran_file, log)
        datas, string = extract_info("test_folder/output/all_CDSs/tmp_cds.gff",
                                     "file")
        self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')