Exemplo n.º 1
0
class TestScreen(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.output = os.path.join(self.test_folder, "output")
        self.tex_wig = os.path.join(self.test_folder, "tex")
        self.frag_wig = os.path.join(self.test_folder, "frag")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tex_wig)
            os.mkdir(self.frag_wig)
            os.mkdir(self.output)
        self.fasta = os.path.join(self.test_folder, "aaa.fa")
        gen_file(self.fasta, self.example.fasta)
        args = self.mock_args.mock()
        args.output_folder = self.output
        args.fasta = self.fasta
        self.screen = Screen(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_screenshot(self):
        gen_file(os.path.join(self.tex_wig, "tex_1_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "notex_1_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.frag_wig, "frag_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "tex_1_r.wig"), self.example.wig_r)
        gen_file(os.path.join(self.tex_wig, "notex_1_r.wig"), self.example.wig_r)
        gen_file(os.path.join(self.frag_wig, "frag_r.wig"), self.example.wig_r)
        args = self.mock_args.mock()
        args.fasta = self.fasta
        args.main_gff = os.path.join(self.test_folder, "main.gff")
        gen_file(args.main_gff, self.example.main_gff)
        side_gff = os.path.join(self.test_folder, "side.gff")
        args.side_gffs = [side_gff]
        gen_file(side_gff, self.example.side_gff)
        args.frag_wigs = self.frag_wig
        args.tex_wigs = self.tex_wig
        args.height = 1000
        args.tlibs = ["tex_1_f.wig:tex:1:a:+", "tex_1_r.wig:tex:1:a:-",
                      "notex_1_f.wig:notex:1:a:+", "notex_1_r.wig:notex:1:a:-"]
        args.flibs = ["frag_f.wig:frag:1:a:+", "frag_r.wig:frag:1:a:-"]
        args.present = "expand"
        args.output_folder = self.output
        self.screen.screenshot(args)
        self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "forward")))
        self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "reverse")))
        datas = import_data(os.path.join(self.output, "screenshots", "aaa", "forward.txt"))
        datas = import_data(os.path.join(self.output, "screenshots", "aaa", "reverse.txt"))
        self.assertEqual("\n".join(datas), self.example.out_r)

    def test_import_libs(self):
        texs = [["tex_1.wig", "tex", "1", "a", "+"], ["notex_1.wig", "notex", "1", "a", "+"]]
        lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []}
        self.screen._import_libs(texs, "+", self.tex_wig, lib_dict)
        self.assertDictEqual(lib_dict, {'fn': ['test_folder/tex/notex_1.wig'], 'rn': [],
                             'rt': [], 'ft': ['test_folder/tex/tex_1.wig'], 'rf': [], 'ff': []})
Exemplo n.º 2
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.mock_parser = Mock_Multiparser()
     self.test_folder = "test_folder"
     self.out = "test_folder/output"
     self.fastas = "test_folder/fastas"
     self.gffs = "test_folder/gffs"
     self.srnas = "test_folder/srnas"
     self.trans = "test_folder/trans"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.out)
         os.mkdir(self.fastas)
         os.mkdir(self.gffs)
         os.mkdir(self.srnas)
         os.mkdir(self.trans)
         os.mkdir(os.path.join(self.out, "tables"))
         os.mkdir(os.path.join(self.out, "gffs"))
         os.mkdir(os.path.join(self.gffs, "tmp"))
         os.mkdir(os.path.join(self.fastas, "tmp"))
     args = self.mock_args.mock()
     args.gffs = self.gffs
     args.fastas = self.fastas
     args.trans = self.trans
     args.out_folder = self.out
     args.srnas = self.srnas
     self.term = Terminator(args)
Exemplo n.º 3
0
class TestGenScreenshots(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_set_data_range(self):
        gff_dict = {"seq_id": "aaa", "source": "Refseq",
                    "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0",
                          "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_low, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,20\n")
        out.close()
        out = StringIO()
        gs.set_data_range(out, gff, self.example.wigs_high, "+")
        self.assertEqual(out.getvalue(), "setDataRange 0,510\n")

    def test_print_batch(self):
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        args = self.mock_args.mock()
        args.fasta = "fasta"
        args.main_gff = "main_gff"
        args.present = "expend"
        args.height = 1000
        args.side_gffs = ["test_folder/side1", "test_folder/side2"]
        gen_file("test_folder/side1", "test")
        gen_file("test_folder/side2", "test")
        args.output_folder = self.test_folder
        gs.print_batch(args, out, "+", lib_t, lib_n, lib_f, "test")
        self.assertEqual(out.getvalue(), self.example.out)

    def test_gen_batch(self):
        gs.import_wig = Mock_func().mock_import_wig
        out = StringIO()
        lib_t = "wig1 wig2"
        lib_n = "wig3 wig4"
        lib_f = "wig5"
        gff_dict = {"seq_id": "aaa", "source": "Refseq",
                    "feature": "CDS", "start": 3,
                    "end": 6, "phase": ".", "strand": "+", "score": "."}
        attributes_gff = {"ID": "CDS0", "Name": "CDS_0",
                          "locus_tag": "AAA_00001"}
        gff = Create_generator(gff_dict, attributes_gff, "gff")
        seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"}
        gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq)
        self.assertEqual(out.getvalue(), self.example.out_print_wig)
Exemplo n.º 4
0
 def setUp(self):
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.mock_parser = Mock_Multiparser()
     self.example = Example()
     self.test_folder = "test_folder"
     self.trans = "test_folder/trans"
     self.out = "test_folder/output"
     self.tex = "test_folder/tex"
     self.frag = "test_folder/frag"
     self.gffs = "test_folder/gffs"
     self.tsss = "test_folder/tsss"
     self.terms = "test_folder/terms"
     self.stat = "test_folder/output/statistics"
     self.out_gff = "test_folder/output/gffs"
     self.out_table = "test_folder/output/tables"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.trans)
         os.mkdir(self.out)
         os.mkdir(self.tex)
         os.mkdir(self.frag)
         os.mkdir(os.path.join(self.frag, "tmp"))
         os.mkdir(self.gffs)
         os.mkdir(os.path.join(self.gffs, "tmp"))
         os.mkdir(self.tsss)
         os.mkdir(self.terms)
         os.mkdir(self.stat)
         os.mkdir(self.out_gff)
         os.mkdir(self.out_table)
     args = self.mock_args.mock()
     args.out_folder = self.out
     self.tran = TranscriptDetection(args)
Exemplo n.º 5
0
class TestExtractRBS(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_site(self):
        inters = [{"seq": "ATGGTGACCCAGGAGGTTGATCCCAGACGTAGGACCTGTTT"},
                  {"seq": "TTAGGACGTACTCCTCGAATGATCAACTGATACTTA"},
                  {"seq": "TTTTTTTTTAAAAAAAAAATATATATTTTTTTTTTT"}]
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.end_rbs = 14
        args.start_rbs = 5
        args.fuzzy_rbs = 2
        ribos = er.detect_site(inters, args)
        self.assertListEqual(ribos, [{'seq': 'TTAGGACGTACTCCTCGAATGATCAACTGATACTTA'}])

    def test_extract_seq(self):
        er.helper = Mock_Helper
        inters = er.extract_seq(self.example.gffs, self.example.seq,
                                self.example.tsss, self.example.tas, 5, 300)
        self.assertDictEqual(inters[0], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 2, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'})
        self.assertDictEqual(inters[1], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 1, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'})
Exemplo n.º 6
0
 def setUp(self):
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.mock_parser = Mock_Multiparser()
     self.example = Example()
     self.test_folder = "test_folder"
     self.trans = "test_folder/trans"
     self.out = "test_folder/output"
     self.gffs = "test_folder/gffs"
     self.tsss = "test_folder/tsss"
     self.terms = "test_folder/terms"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.trans)
         os.mkdir(os.path.join(self.trans, "tmp"))
         os.mkdir(self.out)
         os.mkdir(self.gffs)
         os.mkdir(self.tsss)
         os.mkdir(os.path.join(self.tsss, "tmp"))
         os.mkdir(self.terms)
     args = self.mock_args.mock()
     args.tsss = self.tsss
     args.trans = self.trans
     args.out_folder = self.out
     self.utr = UTRDetection(args)
Exemplo n.º 7
0
class TestOptimizeTSS(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fastas = os.path.join(self.test_folder, "fasta")
        self.wigs = os.path.join(self.test_folder, "wigs")
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.manuals = os.path.join(self.test_folder, "manuals")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.wigs)
            os.mkdir(os.path.join(self.wigs, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.manuals)
            os.mkdir(os.path.join(self.manuals, "tmp"))

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_optimize_tss(self):
        opt.Helper = Mock_helper
        opt.Multiparser = Mock_multiparser
        opt.optimization = Mock_func().mock_optimization
        gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test")
        gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test")
        args = self.mock_args.mock()
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.wigs = self.wigs
        args.tsspredator_path = "test"
        args.manuals = self.manuals
        gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test")
        args.output_folder = self.test_folder
        args.project_strain = "test"
        args.height = 9
        args.height_reduction = 9
        args.factor = 9
        args.factor_reduction = 9
        args.base_height = 9
        args.enrichment = 9
        args.processing = 9
        args.utr = 200
        args.libs = "test"
        args.replicate_name = "test"
        args.cluster = 2
        args.strain_lengths = {"test": 100}
        args.cores = 4
        args.program = "TSS"
        args.replicate = 2
        args.steps = 2000
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        opt.optimize_tss(args, log)
        self.assertTrue(os.path.exists(os.path.join(
            self.test_folder, "test.csv")))
        log.close()
Exemplo n.º 8
0
 def setUp(self):
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.mock_parser = Mock_Multiparser()
     self.example = Example()
     self.test_folder = "test_folder"
     self.trans = "test_folder/trans"
     self.out = "test_folder/output"
     self.wigs = "test_folder/wigs"
     self.gffs = "test_folder/gffs"
     self.tsss = "test_folder/tsss"
     self.fastas = "test_folder/fastas"
     self.manual = "test_folder/manuals"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.trans)
         os.mkdir(self.out)
         os.mkdir(self.wigs)
         os.mkdir(self.gffs)
         os.mkdir(self.tsss)
         os.mkdir(self.fastas)
         os.mkdir(self.manual)
     args = self.mock_args.mock()
     args.out_folder = self.out
     args.ta_files = self.trans
     args.gffs = self.gffs
     args.wig_folder = self.wigs
     args.fastas = self.fastas
     args.manual = self.manual
     self.tss = TSSpredator(args)
Exemplo n.º 9
0
 def setUp(self):
     self.mock = Mock_func()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.tsss = "test_folder/tsss"
     self.srnas = "test_folder/sRNA"
     self.out = "test_folder/output"
     self.trans = "test_folder/trans"
     self.fastas = "test_folder/fastas"
     self.tex = "test_folder/tex"
     self.frag = "test_folder/frag"
     self.gffs = "test_folder/gffs"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.tsss)
         os.mkdir(self.out)
         os.mkdir(self.trans)
         os.mkdir(self.fastas)
         os.mkdir(self.tex)
         os.mkdir(self.frag)
         os.mkdir(self.srnas)
         os.mkdir(self.gffs)
     args = self.mock_args.mock()
     args.tsss = self.tsss
     args.srnas = self.srnas
     args.out_folder = self.out
     args.trans = self.trans
     args.fastas = self.fastas
     self.sorf = sORFDetection(args)
Exemplo n.º 10
0
 def setUp(self):
     self.converter = Converter()
     self.example = Example()
     self.converter.gff3parser = Mock_gff3_parser
     self.converter._print_rntptt_title = Mock_func().print_rntptt_title
     self.converter.tsspredator = Mock_TSSPredatorReader()
     self.converter._read_file = Mock_func().mock_read_file
     self.gff_file = self.example.gff_file
     self.ptt_out = self.example.ptt_out
     self.rnt_out = self.example.rnt_out
     self.srna_out = self.example.srna_out
     self.embl_file = self.example.embl_file
     self.embl_out = self.example.embl_out
     self.multi_embl = self.example.multi_embl
     self.gff_out = self.example.gff_out
     self.mastertable = self.example.mastertable
     self.tss_file = self.example.tss_file
     self.fasta_file = self.example.fasta_file
     self.transterm = self.example.transterm
     self.term_file = self.example.term_file
     self.circ_file = self.example.circrna_table
     self.circ_all = self.example.circrna_all
     self.circ_best = self.example.circrna_best
     self.test_folder = "test_folder"
     self.mock_args = MockClass()
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
Exemplo n.º 11
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.fasta = os.path.join(self.test_folder, "fasta")
     self.snp_folder = os.path.join(self.test_folder, "snp")
     self.table = os.path.join(self.test_folder, "table")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.fasta)
         os.mkdir(self.snp_folder)
         os.mkdir(self.table)
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/seqs"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/seqs/with_BAQ"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/statistics"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/SNP_raw_outputs"))
     args = self.mock_args.mock()
     args.types = "related_genome"
     args.out_folder = self.test_folder
     args.fastas = self.fasta
     self.snp = SNPCalling(args)
     self.mock = Mock_func()
Exemplo n.º 12
0
 def setUp(self):
     self.test_folder = "test_folder"
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.tsss = os.path.join(self.test_folder, "tsss")
     self.trans = os.path.join(self.test_folder, "trans")
     self.utr5s = os.path.join(self.test_folder, "utr5s")
     self.utr3s = os.path.join(self.test_folder, "utr3s")
     self.output = os.path.join(self.test_folder, "output")
     self.gffs = os.path.join(self.test_folder, "gffs")
     self.out_gff = os.path.join(self.output, "gffs")
     self.stat = os.path.join(self.test_folder, "stat")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.gffs)
         os.mkdir(self.tsss)
         os.mkdir(self.stat)
         os.mkdir(os.path.join(self.tsss, "tmp"))
         os.mkdir(self.trans)
         os.mkdir(os.path.join(self.trans, "tmp"))
         os.mkdir(self.utr5s)
         os.mkdir(os.path.join(self.utr5s, "tmp"))
         os.mkdir(self.utr3s)
         os.mkdir(os.path.join(self.utr3s, "tmp"))
         os.mkdir(self.output)
         os.mkdir(self.out_gff)
         os.mkdir(os.path.join(self.output, "tables"))
     args = self.mock_args.mock()
     args.tsss = self.tsss
     args.trans = self.trans
     args.utr5s = self.utr5s
     args.utr3s = self.utr3s
     args.output_folder = self.output
     args.terms = None
     self.operon = OperonDetection(args)
Exemplo n.º 13
0
 def setUp(self):
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.ref_embls = "test_folder/embls"
     self.output_path = "test_folder/output"
     self.tar_fastas = "test_folder/tar_fasta"
     self.ref_fastas = "test_folder/ref_fasta"
     self.gff_outfolder = "test_folder/gffs"
     self.ref_gbk = "test_folder/gbk"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.ref_embls)
         os.mkdir(self.ref_gbk)
         os.mkdir(self.output_path)
         os.mkdir(self.tar_fastas)
         os.mkdir(self.ref_fastas)
         os.mkdir(self.gff_outfolder)
     args = self.mock_args.mock()
     args.output_path = self.output_path
     args.ref_embls = self.ref_embls
     args.ref_gbk = self.ref_gbk
     args.tar_fastas = self.tar_fastas
     args.ref_fastas = self.ref_fastas
     args.gff_outfolder = self.gff_outfolder
     self.ratt = RATT(args)
     self.example = Example()
 def setUp(self):
     self.test_folder = "test_project"
     if os.path.exists(self.test_folder):
         shutil.rmtree(self.test_folder)
     os.mkdir(self.test_folder)
     self.example = Example()
     self.mock_args = MockClass()
Exemplo n.º 15
0
 def setUp(self):
     self.example = Example()
     self.mock = Mock_func()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
Exemplo n.º 16
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.test_folder = "test_folder"
     self.out = "test_folder/output"
     self.fastas = "test_folder/fastas"
     self.gffs = "test_folder/gffs"
     self.stat = "test_folder/stat"
     self.trans = "test_folder/tran"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.out)
         os.mkdir(self.fastas)
         os.mkdir(os.path.join(self.fastas, "tmp"))
         os.mkdir(self.gffs)
         os.mkdir(os.path.join(self.gffs, "tmp"))
         os.mkdir(self.stat)
         os.mkdir(self.trans)
     args = self.mock_args.mock()
     args.gffs = self.gffs
     args.fastas = self.fastas
     args.out_folder = self.out
     args.trans = self.trans
     self.sub = SubLocal(args)
Exemplo n.º 17
0
 def setUp(self):
     self.mock_args = MockClass()
     self.example = Example()
     self.mock = Mock_func()
     self.test_folder = "test_folder"
     self.gffs = "test_folder/gffs"
     self.srnas = "test_folder/srnas"
     self.out = "test_folder/output"
     self.fastas = "test_folder/fastas"
     self.seq = "test_folder/output/sRNA_seqs"
     self.rnaup = "test_folder/output/RNAup"
     self.rnaplex = "test_folder/output/RNAplex"
     self.merge = "test_folder/output/merge"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.gffs)
         os.mkdir(self.out)
         os.mkdir(self.srnas)
         os.mkdir(self.fastas)
         os.mkdir(self.rnaup)
         os.mkdir(self.rnaplex)
         os.mkdir(self.seq)
         os.mkdir(self.merge)
         os.mkdir(os.path.join(self.rnaup, "test"))
     args = self.mock_args.mock()
     args.out_folder = self.out
     args.srnas = self.srnas
     args.fastas = self.fastas
     args.gffs = self.gffs
     self.star = sRNATargetPrediction(args)
Exemplo n.º 18
0
 def setUp(self):
     self.segemehl = Mock_segemehl()
     self.samtools = Mock_samtools()
     self.mock_args = MockClass()
     self.example = Example()
     self.test_folder = "test_folder"
     self.fasta_folder = os.path.join(self.test_folder, "fasta")
     self.gff_folder = os.path.join(self.test_folder, "gff")
     self.out_folder = os.path.join(self.test_folder, "output")
     self.read_folder = os.path.join(self.test_folder, "read")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
     if (not os.path.exists(self.fasta_folder)):
         os.mkdir(self.fasta_folder)
         os.mkdir(os.path.join(self.fasta_folder, "tmp"))
     if (not os.path.exists(self.gff_folder)):
         os.mkdir(self.gff_folder)
     if (not os.path.exists(self.out_folder)):
         os.mkdir(self.out_folder)
     if (not os.path.exists(self.read_folder)):
         os.mkdir(self.read_folder)
     args = self.mock_args.mock()
     args.output_folder = self.out_folder
     args.gffs = self.gff_folder
     args.align = True
     args.fastas = self.fasta_folder
     self.circ = CircRNADetection(args)
Exemplo n.º 19
0
class TestMEME(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.out_folder = "test_folder/output"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out_folder)
            os.mkdir(os.path.join(self.out_folder, "fasta_output"))
        self.tss_folder = os.path.join(self.test_folder, "tss_folder")
        if (not os.path.exists(self.tss_folder)):
            os.mkdir(self.tss_folder)
        self.gff_folder = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gff_folder)):
            os.mkdir(self.gff_folder)
        self.fa_folder = os.path.join(self.test_folder, "fa_folder")
        if (not os.path.exists(self.fa_folder)):
            os.mkdir(self.fa_folder)
        args = self.mock_args.mock()
        args.tsss = self.tss_folder
        args.fastas = self.fa_folder
        args.gffs = self.gff_folder
        args.output_folder = self.out_folder
        self.meme = MEME(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_move_and_merge_fasta(self):
        me.del_repeat_fasta = Mock_func().mock_del_repeat_fasta
        if (not os.path.exists("tmp")):
            os.mkdir("tmp")
        gen_file("tmp/primary.fa", "primary")
        gen_file("tmp/secondary.fa", "secondary")
        gen_file("tmp/internal.fa", "internal")
        gen_file("tmp/antisense.fa", "antisense")
        gen_file("tmp/orphan.fa", "orphan")
        self.meme._move_and_merge_fasta(self.test_folder, "test")
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_all_types.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_primary.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_secondary.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_internal.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_antisense.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_orphan.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_without_orphan.fa")))

    def test_split_fasta_by_strain(self):
        with open(os.path.join(self.fa_folder, "allstrain.fa"), "w") as fh:
            fh.write(""">aaa_aaa_aaa
ATTATATATA
>bbb_bbb_bbb
AATTAATTAA""")
        self.meme._split_fasta_by_strain(self.fa_folder)
        self.assertTrue(os.path.join(self.fa_folder, "aaa.fa"))
        self.assertTrue(os.path.join(self.fa_folder, "bbb.fa"))
Exemplo n.º 20
0
class TestPotentialTarget(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_file(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        fasta, cdss_f, cdss_r, genes = pt.read_file(
            seq_file, gff_file, "test", ["CDS"])
        self.assertEqual(
            fasta,
            "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC")
        self.assertEqual(cdss_f[0].start, 1)
        self.assertEqual(cdss_f[0].feature, "CDS")
        self.assertEqual(cdss_r[0].start, 14)
        self.assertEqual(cdss_r[0].feature, "CDS")
        self.assertEqual(len(genes), 2)
        self.assertEqual(genes[0].start, 1)
        self.assertEqual(genes[1].start, 14)

    def test_deal_cds_forward(self):
        pt.deal_cds_forward(self.example.cdss_f, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_deal_cds_reverse(self):
        pt.deal_cds_reverse(self.example.cdss_r, self.test_folder,
                            self.example.fasta, self.example.genes, 2, 10)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.cdsf_result)

    def test_potential_target(self):
        seq_file = os.path.join(self.test_folder, "seq")
        gff_file = os.path.join(self.test_folder, "gff")
        gen_file(seq_file, self.example.seq_file)
        gen_file(gff_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.tar_start = 2
        args.tar_end = 10
        args.features = ["CDS"]
        pt.potential_target(gff_file, seq_file, self.test_folder, args)
        data = import_data(os.path.join(self.test_folder, "aaa_target.fa"))
        self.assertTrue("\n".join(data), self.example.all_result)
Exemplo n.º 21
0
class TestGetPolyT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_genomes_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder, log)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        data = []
        with open(out_file) as fh:
            for line in fh:
                data.append(line)
        self.assertEqual("".join(data), "Genome	Strand	Start	End	Protein_id	Go_term\ntest1\ntest2\n")
        log.close()
Exemplo n.º 22
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.test_folder = "test_folder"
     self.fasta = "test_folder/fasta"
     self.wigs = "test_folder/wig"
     self.gff = "test_folder/gff"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.fasta)
         os.mkdir(self.wigs)
         os.mkdir(self.gff)
Exemplo n.º 23
0
class TestGetPolyT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
        self.gffs = os.path.join(self.test_folder, "gff_folder")
        if (not os.path.exists(self.gffs)):
            os.mkdir(self.gffs)
        self.go_folder = os.path.join(self.test_folder, "go_folder")
        if (not os.path.exists(self.go_folder)):
            os.mkdir(self.go_folder)
        self.all_strain = "all_strains_uniprot.csv"
        self.trans = os.path.join(self.test_folder, "tran_folder")
        if (not os.path.exists(self.trans)):
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.gffs = self.gffs
        args.trans = self.trans
        self.go = GoTermFinding(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_files(self):
        gff_folder = os.path.join(self.gffs, "test.gff_folder")
        if (not os.path.exists(gff_folder)):
            os.mkdir(gff_folder)
        test1_folder = os.path.join(self.go_folder, "test1")
        if (not os.path.exists(test1_folder)):
            os.mkdir(test1_folder)
        test2_folder = os.path.join(self.go_folder, "test2")
        if (not os.path.exists(test2_folder)):
            os.mkdir(test2_folder)
        with open(os.path.join(gff_folder, "test1.gff"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(gff_folder, "test2.gff"), "w") as fh:
            fh.write("test2")
        with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh:
            fh.write("test1")
        with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh:
            fh.write("test2")
        self.go._merge_files(self.gffs, self.go_folder, self.test_folder)
        out_file = os.path.join(self.go_folder, "test", self.all_strain)
        self.assertTrue(os.path.exists(out_file))
        with open(out_file) as fh:
            for line in fh:
                self.assertEqual(line, "test1test2")
Exemplo n.º 24
0
 def setUp(self):
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.fastas = os.path.join(self.test_folder, "fasta")
     self.wigs = os.path.join(self.test_folder, "wigs")
     self.gffs = os.path.join(self.test_folder, "gffs")
     self.manuals = os.path.join(self.test_folder, "manuals")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.fastas)
         os.mkdir(os.path.join(self.fastas, "tmp"))
         os.mkdir(self.wigs)
         os.mkdir(os.path.join(self.wigs, "tmp"))
         os.mkdir(self.gffs)
         os.mkdir(os.path.join(self.gffs, "tmp"))
         os.mkdir(self.manuals)
         os.mkdir(os.path.join(self.manuals, "tmp"))
Exemplo n.º 25
0
    def setUp(self):
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(os.path.join(self.test_folder, "tmp_id_list"))
#            os.mkdir(os.path.join(self.test_folder, "tmp_nospecific"))
            os.mkdir(os.path.join(self.test_folder, "with_strain"))
            os.mkdir(os.path.join(self.test_folder, "with_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "without_strain"))
            os.mkdir(os.path.join(self.test_folder, "without_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "all_results"))
            os.mkdir(os.path.join(self.test_folder, "best_results"))
            os.mkdir(os.path.join(self.test_folder, "figures"))
        self.ppi = PPINetwork(self.test_folder)
        self.mock = Mock_func()
        self.example = Example()
Exemplo n.º 26
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.mock = Mock_func()
     self.test_folder = "test_folder"
     self.gffs = os.path.join(self.test_folder, "gffs")
     self.fastas = os.path.join(self.test_folder, "fastas")
     self.out_folder = os.path.join(self.test_folder, "output")
     self.database = os.path.join(self.test_folder, "database")
     self.seq_path = os.path.join(self.test_folder, "seqs")
     self.tables = os.path.join(self.out_folder, "tables")
     self.stat = os.path.join(self.out_folder, "statistics")
     self.scan = os.path.join(self.test_folder, "scan")
     self.tsss = os.path.join(self.test_folder, "tsss")
     self.trans = os.path.join(self.test_folder, "trans")
     self.out_gff = os.path.join(self.out_folder, "gffs")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.tsss)
         os.mkdir(os.path.join(self.tsss, "tmp"))
         os.mkdir(self.trans)
         os.mkdir(os.path.join(self.trans, "tmp"))
         os.mkdir(self.gffs)
         os.mkdir(os.path.join(self.gffs, "tmp"))
         os.mkdir(self.fastas)
         os.mkdir(os.path.join(self.fastas, "tmp"))
         os.mkdir(self.out_folder)
         os.mkdir(self.database)
         os.mkdir(self.seq_path)
         os.mkdir(os.path.join(self.out_folder, "tmp_table"))
         os.mkdir(os.path.join(self.out_folder, "tmp_scan"))
         os.mkdir(os.path.join(self.out_folder, "tmp_fasta"))
         os.mkdir(os.path.join(self.out_folder, "scan_Rfam"))
         os.mkdir(self.tables)
         os.mkdir(self.scan)
         os.mkdir(self.stat)
         os.mkdir(self.out_gff)
     args = self.mock_args.mock()
     args.gffs = self.gffs
     args.fastas = self.fastas
     args.ribos_out_folder = self.out_folder
     args.database = self.database
     args.tsss = self.tsss
     args.trans = self.trans
     args.program = 'riboswtich'
     self.ribo = Ribos(args)
Exemplo n.º 27
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.output = os.path.join(self.test_folder, "output")
     self.tex_wig = os.path.join(self.test_folder, "tex")
     self.frag_wig = os.path.join(self.test_folder, "frag")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.tex_wig)
         os.mkdir(self.frag_wig)
         os.mkdir(self.output)
     self.fasta = os.path.join(self.test_folder, "aaa.fa")
     gen_file(self.fasta, self.example.fasta)
     args = self.mock_args.mock()
     args.output_folder = self.output
     args.fasta = self.fasta
     self.screen = Screen(args, self.output)
Exemplo n.º 28
0
 def setUp(self):
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
     self.gffs = os.path.join(self.test_folder, "gff_folder")
     if (not os.path.exists(self.gffs)):
         os.mkdir(self.gffs)
     self.go_folder = os.path.join(self.test_folder, "go_folder")
     if (not os.path.exists(self.go_folder)):
         os.mkdir(self.go_folder)
     self.all_strain = "all_genomes_uniprot.csv"
     self.trans = os.path.join(self.test_folder, "tran_folder")
     if (not os.path.exists(self.trans)):
         os.mkdir(self.trans)
     args = self.mock_args.mock()
     args.out_folder = self.test_folder
     args.gffs = self.gffs
     args.trans = self.trans
     self.go = GoTermFinding(args)
Exemplo n.º 29
0
 def setUp(self):
     self.mock_args = MockClass()
     self.example = Example()
     self.mock = Mock_func()
     self.test_folder = "test_folder"
     self.gffs = "test_folder/gffs"
     self.tsss = "test_folder/tsss"
     self.sorf = "test_folder/sORF"
     self.out = "test_folder/output"
     self.trans = "test_folder/trans"
     self.fastas = "test_folder/fastas"
     self.tex = "test_folder/tex"
     self.frag = "test_folder/frag"
     self.pros = "test_folder/pros"
     self.terms = "test_folder/terms"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.gffs)
         os.mkdir(self.tsss)
         os.mkdir(os.path.join(self.tsss, "tmp"))
         os.mkdir(self.out)
         os.mkdir(self.trans)
         os.mkdir(os.path.join(self.trans, "tmp"))
         os.mkdir(self.fastas)
         os.mkdir(os.path.join(self.fastas, "tmp"))
         os.mkdir(self.tex)
         os.mkdir(self.frag)
         os.mkdir(self.pros)
         os.mkdir(os.path.join(self.pros, "tmp"))
         os.mkdir(self.sorf)
         os.mkdir(os.path.join(self.sorf, "tmp"))
         os.mkdir(self.terms)
     args = self.mock_args.mock()
     args.tss_folder = self.tsss
     args.pro_folder = self.pros
     args.out_folder = self.out
     args.sorf_file = self.sorf
     args.fastas = self.fastas
     args.trans = self.trans
     args.terms = self.terms
     self.srna = sRNADetection(args)
Exemplo n.º 30
0
 def setUp(self):
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.out_folder = "test_folder/output"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.out_folder)
         os.mkdir(os.path.join(self.out_folder, "fasta_output"))
     self.tss_folder = os.path.join(self.test_folder, "tss_folder")
     if (not os.path.exists(self.tss_folder)):
         os.mkdir(self.tss_folder)
     self.gff_folder = os.path.join(self.test_folder, "gff_folder")
     if (not os.path.exists(self.gff_folder)):
         os.mkdir(self.gff_folder)
     self.fa_folder = os.path.join(self.test_folder, "fa_folder")
     if (not os.path.exists(self.fa_folder)):
         os.mkdir(self.fa_folder)
     args = self.mock_args.mock()
     args.tsss = self.tss_folder
     args.fastas = self.fa_folder
     args.gffs = self.gff_folder
     args.output_folder = self.out_folder
     self.meme = MEME(args)
Exemplo n.º 31
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
Exemplo n.º 32
0
class TestCoverageDetection(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()

    def test_coverage_comparison_first(self):
        first = True
        cover_sets = {"high": -1, "low": -1}
        poss = {"high": -1, "low": -1}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})

    def test_coverage_comparison_forward(self):
        first = False
        cover_sets = {"high": 50, "low": 20}
        poss = {"high": 10, "low": 30}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})
        cover = 30
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+",
                                         51)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 30})
        self.assertDictEqual(poss, {"high": 50, "low": 51})

    def test_coverage_comparison_reverse(self):
        first = False
        cover_sets = {"high": 50, "low": 20}
        poss = {"high": 30, "low": 10}
        cover = 100
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-",
                                         50)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 100})
        self.assertDictEqual(poss, {"high": 50, "low": 50})
        cover = 30
        cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-",
                                         49)
        self.assertDictEqual(cover_sets, {"high": 100, "low": 30})
        self.assertDictEqual(poss, {"high": 50, "low": 49})

    def test_define_cutoff_median(self):
        coverages = {"3utr": "mean", "5utr": "median"}
        median = {
            "track_a": {
                "median": 100,
                "mean": 200
            },
            "track_b": {
                "median": 30,
                "mean": 80
            }
        }
        cutoff = cover_detect.define_cutoff(coverages, median, "5utr")
        self.assertDictEqual(cutoff, {'track_a': 100, 'track_b': 30})
        cutoff = cover_detect.define_cutoff(coverages, median, "3utr")
        self.assertDictEqual(cutoff, {'track_a': 200, 'track_b': 80})

    def test_check_tex(self):
        template_texs = self.example.texs
        covers = self.example.cover_datas
        coverages = {"3utr": 100, "5utr": 600}
        poss = {"high": 30, "low": 10}
        median = {
            "track1_tex": {
                "median": 100,
                "mean": 200
            },
            "track1_notex": {
                "median": 30,
                "mean": 80
            },
            "track2_tex": {
                "median": 150,
                "mean": 200
            },
            "track2_notex": {
                "median": 10,
                "mean": 20
            },
            "frag": {
                "median": 80,
                "mean": 100
            }
        }
        target_datas = []
        texs = {
            "track1_tex@AND@track1_notex": 0,
            "track2_tex@AND@track2_notex": 0
        }
        detect_num_lib = cover_detect.check_tex(template_texs, covers,
                                                target_datas, 20, None, poss,
                                                median, coverages, "3utr", 200,
                                                2)
        self.assertEqual(detect_num_lib, 2)
        num_frag = 0
        num_tex = 0
        for target in target_datas:
            if target["type"] == "frag":
                num_frag += 1
            else:
                num_tex += 1
        self.assertEqual(num_frag, 1)
        self.assertEqual(num_tex, 2)
        detect_num_lib = cover_detect.check_tex(template_texs, covers,
                                                target_datas, 20,
                                                "sRNA_utr_derived", poss,
                                                median, coverages, "5utr", 200,
                                                2)
        self.assertEqual(detect_num_lib, 2)
        self.assertDictEqual(poss, {
            'start': 100,
            'high': 30,
            'end': 202,
            'low': 10
        })

    def test_replicate_comparison(self):
        cover_detect.check_tex = Mock_func().mock_check_tex
        template_texs = self.example.texs
        srna_covers = {"texnotex": self.example.cover_datas}
        coverages = {"3utr": 100, "5utr": 600}
        median = {
            "track1_tex": {
                "median": 100,
                "mean": 200
            },
            "track1_notex": {
                "median": 30,
                "mean": 80
            },
            "track2_tex": {
                "median": 150,
                "mean": 200
            },
            "track2_notex": {
                "median": 10,
                "mean": 20
            },
            "frag": {
                "median": 80,
                "mean": 100
            }
        }
        texs = {
            "track1_tex@AND@track1_notex": 0,
            "track2_tex@AND@track2_notex": 0
        }
        args = self.mock_args.mock()
        args.replicates = {"tex": "all_2", "frag": "all_1"}
        args.tex_notex = 2
        srna_datas = cover_detect.replicate_comparison(args, srna_covers, "+",
                                                       "sRNA_utr_derived",
                                                       median, coverages,
                                                       "3utr", 100, 200,
                                                       template_texs)
        self.assertEqual(srna_datas["best"], 500)
        self.assertEqual(srna_datas["track"], "frag")
        self.assertEqual(srna_datas["high"], 700)
        self.assertEqual(srna_datas["low"], 400)
        self.assertEqual(srna_datas["start"], 100)
        self.assertEqual(srna_datas["end"], 202)
Exemplo n.º 33
0
class TestScreen(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.output = os.path.join(self.test_folder, "output")
        self.tex_wig = os.path.join(self.test_folder, "tex")
        self.frag_wig = os.path.join(self.test_folder, "frag")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tex_wig)
            os.mkdir(self.frag_wig)
            os.mkdir(self.output)
        self.fasta = os.path.join(self.test_folder, "aaa.fa")
        gen_file(self.fasta, self.example.fasta)
        args = self.mock_args.mock()
        args.output_folder = self.output
        args.fasta = self.fasta
        self.screen = Screen(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_screenshot(self):
        gen_file(os.path.join(self.tex_wig, "tex_1_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "notex_1_f.wig"),
                 self.example.wig_f)
        gen_file(os.path.join(self.frag_wig, "frag_f.wig"), self.example.wig_f)
        gen_file(os.path.join(self.tex_wig, "tex_1_r.wig"), self.example.wig_r)
        gen_file(os.path.join(self.tex_wig, "notex_1_r.wig"),
                 self.example.wig_r)
        gen_file(os.path.join(self.frag_wig, "frag_r.wig"), self.example.wig_r)
        args = self.mock_args.mock()
        args.fasta = self.fasta
        args.main_gff = os.path.join(self.test_folder, "main.gff")
        gen_file(args.main_gff, self.example.main_gff)
        side_gff = os.path.join(self.test_folder, "side.gff")
        args.side_gffs = [side_gff]
        gen_file(side_gff, self.example.side_gff)
        args.frag_wigs = self.frag_wig
        args.tex_wigs = self.tex_wig
        args.height = 1000
        args.tlibs = [
            "test_folder/tex/tex_1_f.wig:tex:1:a:+",
            "test_folder/tex/tex_1_r.wig:tex:1:a:-",
            "test_folder/tex/notex_1_f.wig:notex:1:a:+",
            "test_folder/tex/notex_1_r.wig:notex:1:a:-"
        ]
        args.flibs = [
            "test_folder/frag/frag_f.wig:frag:1:a:+",
            "test_folder/frag/frag_r.wig:frag:1:a:-"
        ]
        args.present = "expand"
        args.output_folder = self.output
        self.screen.screenshot(args)
        self.assertTrue(
            os.path.exists(
                os.path.join(self.output, "screenshots", "aaa", "forward")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.output, "screenshots", "aaa", "reverse")))
        datas = import_data(
            os.path.join(self.output, "screenshots", "aaa", "forward.txt"))
        datas = import_data(
            os.path.join(self.output, "screenshots", "aaa", "reverse.txt"))
        self.assertEqual("\n".join(datas), self.example.out_r)

    def test_import_libs(self):
        texs = [[
            os.path.join(self.tex_wig, "tex_1.wig"), "tex", "1", "a", "+"
        ], [os.path.join(self.tex_wig, "notex_1.wig"), "notex", "1", "a", "+"]]
        lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []}
        self.screen._import_libs(texs, "+", lib_dict)
        self.assertDictEqual(
            lib_dict, {
                'fn': ['test_folder/tex/notex_1.wig'],
                'rn': [],
                'rt': [],
                'ft': ['test_folder/tex/tex_1.wig'],
                'rf': [],
                'ff': []
            })
Exemplo n.º 34
0
class TestsTranscriptAssembly(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.terms = "test_folder/terms"
        self.stat = "test_folder/output/statistics"
        self.out_gff = "test_folder/output/gffs"
        self.out_table = "test_folder/output/tables"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(self.out)
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(os.path.join(self.frag, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.tsss)
            os.mkdir(self.terms)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
            os.mkdir(self.out_table)
        args = self.mock_args.mock()
        args.out_folder = self.out
        self.tran = TranscriptDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compute(self):
        pre_assembly = tr.detect_transcript
        tr.detect_transcript = self.mock.mock_assembly
        gen_file(os.path.join(self.frag, "tmp/test_forward.wig"), "test")
        args = self.mock_args.mock()
        args.replicates = "rep"
        args.out_foler = self.out
        strains = self.tran._compute("frag", self.frag, "libs", args)
        self.assertListEqual(strains, ['test'])
        tr.assembly = pre_assembly

    def test_for_one_wig(self):
        pre_assembly = tr.assembly
        tr.assembly = self.mock.mock_assembly
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.frag, "tmp/test_forward.wig"), "test")
        gen_file(os.path.join(self.out, "test_frag"), self.example.tran_file)
        args = self.mock_args.mock()
        args.replicates = "rep"
        args.libs = "libs"
        args.gffs = self.gffs
        args.out_folder = self.out
        args.frag_wigs = self.frag
        args.flibs = "flibs"
        strains = self.tran._for_one_wig("frag", args)
        self.assertListEqual(strains, ['test'])
        datas = import_data(
            os.path.join(self.out_gff, "test_transcript_frag.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)
        tr.assembly = pre_assembly

    def test_for_two_wigs(self):
        pre_combine = tr.combine
        tr.combine = self.mock.mock_combine
        gen_file(os.path.join(self.out_gff, "test_transcript_fragment.gff"),
                 "test")
        gen_file(os.path.join(self.out_gff, "test_transcript_tex_notex.gff"),
                 "test")
        args = self.mock_args.mock()
        args.frag_wigs = self.frag
        args.tex_wigs = self.tex
        args.gffs = self.gffs
        args.tolerance = 5
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran._for_two_wigs(["test"], args, log)
        self.assertTrue(
            os.path.exists(os.path.join(self.out_gff, "test_transcript.gff")))
        tr.combine = pre_combine

    def test_post_modify(self):
        pre_longer = tr.longer_ta
        pre_fill = tr.fill_gap
        tr.longer_ta = self.mock.mock_longer_ta
        tr.fill_gap = self.mock.mock_fill_gap
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gff_out = os.path.join(self.out, "gffs")
        os.mkdir(os.path.join(self.out, "tmp_tran"))
        gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.out_folder = self.out
        args.length = 20
        args.modify = "merge_overlap"
        self.tran._post_modify(["test"], args)
        self.assertTrue(
            os.path.exists(os.path.join(gff_out, "test_transcript.gff")))
        tr.longer_ta = pre_longer
        tr.fill_gap = pre_fill

    def test_compare_cds(self):
        tr.stat_ta_gff = self.mock.mock_stat_ta_gff
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.gffs, "tmp/test.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.out_gff, "test_transcript.gff"),
                 self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(os.path.join(gff_out, "tmp_ta_gff"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_gff_ta"), self.example.gff_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.trans = self.trans
        args.gffs = self.gffs
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        args.c_feature = ["CDS"]
        self.tran._compare_cds(["test"], args, log)
        datas = import_data(os.path.join(self.gffs, "test.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.gff_file)
        datas = import_data(os.path.join(self.out_gff, "test_transcript.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)

    def test_compare_tss(self):
        tr.stat_ta_tss = self.mock.mock_stat_ta_tss
        self.tran.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "test_TSS.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.gffs, "tmp/test_TSS.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.out_gff, "test_transcript.gff"),
                 self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(os.path.join(gff_out, "tmp_ta_tss"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_tss_ta"), self.example.gff_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.trans = self.trans
        args.compare_tss = self.gffs
        args.fuzzy = 2
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran._compare_tss(["test"], args, log)
        datas = import_data(os.path.join(self.gffs, "test_TSS.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.gff_file)
        datas = import_data(os.path.join(self.out_gff, "test_transcript.gff"))
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.tran_file)

    def test_run_transcript_assembly(self):
        tr.stat_ta_tss = self.mock.mock_stat_ta_tss
        tr.stat_ta_gff = self.mock.mock_stat_ta_gff
        tr.longer_ta = self.mock.mock_longer_ta
        tr.fill_gap = self.mock.mock_fill_gap
        tr.combine = self.mock.mock_combine
        pre_assembly = tr.detect_transcript
        tr.assembly = self.mock.mock_assembly
        tr.gen_table_transcript = self.mock.mock_gen_table_tran
        gen_file(os.path.join(self.frag, "tmp/test1_forward.wig"),
                 self.example.wig_f)
        gen_file(os.path.join(self.frag, "tmp/test1_reverse.wig"),
                 self.example.wig_r)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.terms, "test_term.gff"),
                 self.example.term_file)
        gen_file("test_folder/output/test1_fragment", self.example.tran_file)
        gff_out = os.path.join(self.out, "gffs")
        gen_file(
            os.path.join(gff_out, "test_transcript_assembly_fragment.gff"),
            self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file)
        gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.frag_wigs = self.frag
        args.tex_wigs = None
        args.flibs = "flibs"
        args.tlibs = "tlibs"
        args.gffs = self.gffs
        args.terms = None
        args.compare_tss = None
        args.c_feature = None
        args.fuzzy_term = 1
        args.max_dist = 2000
        args.modify = "merge_overlap"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.tran.run_transcript(args, log)
        tr.assembly = pre_assembly
Exemplo n.º 35
0
class TestSubLocal(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.gffs = "test_folder/gffs"
        self.stat = "test_folder/stat"
        self.trans = "test_folder/tran"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out)
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.stat)
            os.mkdir(self.trans)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.out_folder = self.out
        args.trans = self.trans
        self.sub = SubLocal(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_protein_seq(self):
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        gff = "aaa.gff"
        gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file)
        gen_file(os.path.join(self.trans, "aaa_transcript.gff"),
                 self.example.tran_file)
        prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans)
        self.assertEqual(prefix, "aaa")

    def test_run_psortb(self):
        self.sub._psortb = self.mock.mock_psortb
        tmp_result = os.path.join(self.out, "tmp_results")
        os.mkdir(tmp_result)
        args = self.mock_args.mock()
        args.psortb_path = "psortb_path"
        args.gram = "positive"
        self.sub._run_psortb(args, "aaa", self.out, self.test_folder,
                             tmp_result)
        self.assertTrue(os.path.exists(os.path.join(self.out, "tmp_log")))
        self.assertTrue(
            os.path.exists(
                os.path.join(tmp_result, "_".join(["aaa", "raw.txt"]))))

    def test_merge_and_stat(self):
        su.stat_sublocal = self.mock.mock_stat_sublocal
        os.mkdir(os.path.join(self.gffs, "aaa.gff_folder"))
        gen_file(os.path.join(self.gffs, "aaa.gff_folder/aaa.gff"), "test")
        os.mkdir(os.path.join(self.out, "psortb_results"))
        gen_file(os.path.join(self.out, "aaa_raw.txt"), "test")
        gen_file(os.path.join(self.out, "aaa_table.csv"), "test")
        self.sub._merge_and_stat(self.gffs, self.out, self.test_folder,
                                 self.stat)
        self.assertTrue(os.path.exists(os.path.join(self.stat, "aaa")))
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "aaa")))

    def test_compare_cds_tran(self):
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tran_file = os.path.join(self.test_folder, "aaa_transcript.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.tran_file)
        self.sub._compare_cds_tran(gff_file, tran_file)
        datas, string = extract_info("test_folder/output/all_CDSs/tmp_cds.gff",
                                     "file")
        self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
class TestMergeRNAplexRNAup(unittest.TestCase):
    def setUp(self):
        self.test_folder = "test_project"
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.mkdir(self.test_folder)
        self.example = Example()
        self.mock_args = MockClass()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_energy(self):
        srna = {"energy": -2}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -5.3})
        srna = {"energy": -8}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -8.0})

    def test_print_rank_one(self):
        out = StringIO()
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        mrr.print_rank_one(self.example.srnas, out, "RNAplex",
                           self.example.gffs, self.example.srna_gffs, args_tar,
                           50)
        datas = convert_dict(out.getvalue().split("\n"))
        news = {}
        for key, value in datas.items():
            if len(key) != 0:
                news[key] = value
        refs = convert_dict(self.example.out_print.split("\n"))
        self.assertDictEqual(news, refs)

    def test_read_table(self):
        rnaplex = os.path.join(self.test_folder, "rnaplex")
        rnaup = os.path.join(self.test_folder, "rnaup")
        gen_file(rnaplex, self.example.rnaplex)
        gen_file(rnaup, self.example.rnaup)
        srnas = mrr.read_table(self.example.srna_gffs, rnaplex, rnaup, None,
                               self.example.genes, self.example.gffs, ["CDS"])
        self.assertDictEqual(
            srnas, {
                'IntaRNA': {},
                'RNAup': {
                    'srna0': [{
                        'srna_pos': '20,25',
                        'energy': -4.87,
                        'tar_pos': '571,576',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }, {
                        'srna_pos': '11,26',
                        'energy': -5.91,
                        'tar_pos': '14,30',
                        'gene_id': 'NA',
                        'target_id': 'cds1',
                        'target_locus': 'AAA_00003',
                        'detail': '2348-2934_+'
                    }]
                },
                'RNAplex': {
                    'srna0': [{
                        'srna_pos': '20,25',
                        'energy': -5.3,
                        'tar_pos': '571,576',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }],
                    'srna1': [{
                        'srna_pos': '24,31',
                        'energy': -1.91,
                        'tar_pos': '163,170',
                        'gene_id': 'gene0',
                        'target_id': 'cds0',
                        'target_locus': 'AAA_00001',
                        'detail': '100-150_+'
                    }]
                }
            })

    def test_get_srna_name(self):
        output = mrr.get_srna_name(self.example.srna_gffs, "srna0")
        self.assertEqual(output[0], 'sRNA_0')
        self.assertEqual(output[1].start, 6)

    def test_get_target_info(self):
        target = {
            "gene_id": "gene0",
            "detail": "100-150_+",
            "target_id": "cds0",
            "target_locus": "AAA_00001",
            "energy": -6.5
        }
        output = mrr.get_target_info(self.example.gffs, target)
        self.assertEqual(output.start, 100)

    def test_merge_result(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        merges = []
        methods = ["RNAup", "RNAplex"]
        overlap = mrr.merge_result(self.example.srnas, self.example.srna_gffs,
                                   args_tar, self.example.gffs, merges, 50,
                                   methods)
        output = [[
            'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0', 'cds0',
            'AAA_00001', '100-150', '89-50', '89-50', '+', '-6.5', '1', '-6.5',
            '1'
        ],
                  [
                      'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267',
                      '+', 'gene2', 'cds2', 'AAA_00003', '2348-2934',
                      '2337-50', '2337-50', '+', '-10.5', '1', '-10.5', '1'
                  ],
                  [
                      'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516',
                      '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50',
                      '89-50', '+', '-23.5', '1', '-23.5', '1'
                  ]]
        count = 0
        for out in output:
            for data in overlap:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)

    def test_merge_last(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        srnas = {
            "RNAplex": {
                "srna0": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -6.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna1": [{
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -10.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna2": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -23.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }, {
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -6.5,
                    "rank": 2,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }]
            },
            "RNAup": {
                "srna0": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -6.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna1": [{
                    "gene_id": "gene2",
                    "detail": "2348-2934_+",
                    "target_id": "cds2",
                    "target_locus": "AAA_00003",
                    "energy": -10.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }],
                "srna2": [{
                    "gene_id": "gene0",
                    "detail": "100-150_+",
                    "target_id": "cds0",
                    "target_locus": "AAA_00001",
                    "energy": -23.5,
                    "rank": 1,
                    "srna_pos": "2,10",
                    "tar_pos": "10,15"
                }]
            }
        }
        merges = []
        mrr.merge_last(srnas, self.example.srna_gffs, args_tar,
                       self.example.gffs, merges, 50, "RNAplex", "RNAup", 2,
                       None, False)
        output = [[
            'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+',
            'gene2', 'cds2', 'AAA_00003', '2348-2934', '2337-50', '2337-50',
            '+', '-10.5', '1', '-10.5', '1'
        ],
                  [
                      'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516',
                      '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50',
                      '89-50', '+', '-23.5', '1', '-23.5', '1'
                  ],
                  [
                      'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0',
                      'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+',
                      '-6.5', '1', '-6.5', '1'
                  ]]
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 3)
Exemplo n.º 37
0
class TestMergeRNAplexRNAup(unittest.TestCase):

    def setUp(self):
        self.test_folder = "test_project"
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.mkdir(self.test_folder)
        self.example = Example()
        self.mock_args = MockClass()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_energy(self):
        srna = {"energy": -2}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -5.3})
        srna = {"energy": -8}
        mrr.detect_energy(self.example.out_rna_txt, srna)
        self.assertDictEqual(srna, {'energy': -8.0})

    def test_print_rank_one(self):
        out = StringIO()
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        mrr.print_rank_one(self.example.srnas, out, "RNAplex", self.example.gffs, self.example.srna_gffs, args_tar)
        datas = convert_dict(out.getvalue().split("\n"))
        refs = convert_dict(self.example.out_print.split("\n"))
        self.assertDictEqual(datas, refs)

    def test_read_table(self):
        rnaplex = os.path.join(self.test_folder, "rnaplex")
        rnaup = os.path.join(self.test_folder, "rnaup")
        gen_file(rnaplex, self.example.rnaplex)
        gen_file(rnaup, self.example.rnaup)
        srnas = mrr.read_table(self.example.gffs, rnaplex, rnaup)
        self.assertDictEqual(srnas, {'RNAup': {'srna352': [{'target': 'srna1023', 'energy': 0},
                                    {'tar_pos': '571,576', 'target': 'SAOUHSC_00001|dnaA',
                                     'energy': -4.87, 'srna_pos': '20,25'},
                                    {'tar_pos': '14,30', 'target': 'SAOUHSC_00002',
                                     'energy': -5.91, 'srna_pos': '11,26'}]},
                                     'RNAplex': {'srna1023': [{'tar_pos': '571,576',
                                     'target': 'SAOUHSC_00001|dnaA', 'energy': -5.3, 'srna_pos': '20,25'}],
                                     'srna352': [{'tar_pos': '163,170', 'target': 'SAOUHSC_00001|dnaA',
                                     'energy': -1.91, 'srna_pos': '24,31'}]}})

    def test_get_srna_name(self):
        output = mrr.get_srna_name(self.example.srna_gffs, "srna0")
        self.assertEqual(output[0], 'sRNA_0')
        self.assertEqual(output[1].start, 6)

    def test_get_target_info(self):
        output = mrr.get_target_info(self.example.gffs, "AAA_00001")
        self.assertEqual(output.start, 100)

    def test_merge_base_rnaplex(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        merges = []
        overlap = mrr.merge_base_rnaplex(self.example.srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges)
        output = [['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-12', '7-15', '+', 'AAA_00002|dnaA',
                   '2348-2934', '2330-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2'],
                  ['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003',
                   '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'],
                  ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1']]
        count = 0
        for out in output:
            for data in overlap:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)

    def test_merge_base_rnaup(self):
        args_tar = self.mock_args.mock()
        args_tar.top = 2
        args_tar.tar_start = 20
        args_tar.tar_end = 15
        srnas = {"RNAplex": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}],
                             "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                             "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00002|dnaA", "energy": -3.43, "rank": 3, "srna_pos": "2,10", "tar_pos": "10,15"},
                                       {"target": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}]},
                 "RNAup": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                     {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}],
                           "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                           "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}]}}
        merges = []
        mrr.merge_base_rnaup(srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges)
        output = [['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003',
                   '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'],
                  ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001',
                   '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'],
                  ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00002|dnaA',
                   '2348-2934', '2337-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2']]
        count = 0
        for out in output:
            for data in merges:
                if out == data:
                    count += 1
        self.assertEqual(count, 4)
Exemplo n.º 38
0
class TestRibos(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.fastas = os.path.join(self.test_folder, "fastas")
        self.out_folder = os.path.join(self.test_folder, "output")
        self.database = os.path.join(self.test_folder, "database")
        self.seq_path = os.path.join(self.test_folder, "seqs")
        self.tables = os.path.join(self.out_folder, "tables")
        self.stat = os.path.join(self.out_folder, "statistics")
        self.scan = os.path.join(self.test_folder, "scan")
        self.tsss = os.path.join(self.test_folder, "tsss")
        self.trans = os.path.join(self.test_folder, "trans")
        self.out_gff = os.path.join(self.out_folder, "gffs")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.gffs)
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.out_folder)
            os.mkdir(self.database)
            os.mkdir(self.seq_path)
            os.mkdir(os.path.join(self.out_folder, "tmp_table"))
            os.mkdir(os.path.join(self.out_folder, "tmp_scan"))
            os.mkdir(os.path.join(self.out_folder, "tmp_fasta"))
            os.mkdir(os.path.join(self.out_folder, "scan_Rfam"))
            os.mkdir(self.tables)
            os.mkdir(self.scan)
            os.mkdir(self.stat)
            os.mkdir(self.out_gff)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.ribos_out_folder = self.out_folder
        args.database = self.database
        args.tsss = self.tsss
        args.trans = self.trans
        args.program = 'riboswtich'
        self.ribo = Ribos(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_scan_extract_rfam(self):
        self.ribo._run_cmscan = self.mock.mock_run_cmscan
        rb.modify_table = self.mock.mock_modify_table
        rb.regenerate_seq = self.mock.mock_regenerate_seq
        rb.reextract_rbs = self.mock.mock_reextract_rbs
        prefixs = []
        gen_file(os.path.join(self.gffs, "tmp/test.gff"),
                 self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.seq_path, "test.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"),
                 self.example.fasta_file)
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.fuzzy = 5
        args.fuzzy_rbs = 2
        args.utr = True
        args.without_rbs = False
        args.rbs_seq = ["AGGAGG"]
        args.output_all = "test"
        args.cutoff = "e_0.01"
        tmp_files = {
            "fasta": os.path.join(self.out_folder, "tmp_fasta"),
            "scan": "tmp_scan",
            "table": os.path.join(self.out_folder, "tmp_table")
        }
        rfam = "Rfam_.cm"
        suffixs = {
            "csv": "test.csv",
            "txt": "test_prescan.txt",
            "re_txt": "test_scan.txt",
            "re_csv": "test_scan.csv"
        }
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ribo._scan_extract_rfam(prefixs, args, tmp_files, suffixs, "test",
                                     rfam, log)
        self.assertListEqual(prefixs, ["test"])
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out_folder, "tmp_fasta",
                             "test_regenerate.fa")))

    def test_merge_results(self):
        rb.stat_and_covert2gff = self.mock.mock_stat_and_covert2gff
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(
            os.path.join(self.out_folder, "tmp_table/test_riboswitch.csv"),
            self.example.table)
        gen_file(
            os.path.join(self.out_folder,
                         "tmp_scan/test_riboswitch_prescan.txt"),
            self.example.rescan_file)
        gen_file(
            os.path.join(self.out_folder, "tmp_scan/test_riboswitch_scan.txt"),
            self.example.rescan_file)
        gen_file(os.path.join(self.test_folder, "ids"), self.example.ids)
        gen_file(os.path.join(self.tables, "test_riboswitch.csv"),
                 self.example.table)
        gen_file('test_folder/output/tmp_table/test_test_scan.csv', "test")
        gen_file(
            os.path.join("test_folder/output", "tmp_fasta",
                         "test_regenerate.fa"), "test")
        gen_file('test_folder/output/tmp_scan/test_test_prescan.txt', "test")
        gen_file('test_folder/output/tmp_scan/test_test_scan.txt', "test")
        if not os.path.exists('test_folder/output/tmp_table/test_test.csv'):
            gen_file('test_folder/output/tmp_table/test_test.csv', "test")
        args = self.mock_args.mock()
        args.start_codons = ["ATG"]
        args.fastas = self.fastas
        args.out_folder = self.out_folder
        args.gffs = self.gffs
        args.ribos_id = os.path.join(self.test_folder, "ids")
        args.fuzzy = 3
        suffixs = {
            "csv": "test.csv",
            "txt": "test_prescan.txt",
            "re_txt": "test_scan.txt",
            "re_csv": "test_scan.csv"
        }
        tmp_files = {
            "fasta": os.path.join(self.out_folder, "tmp_fasta"),
            "scan": os.path.join(self.out_folder, "tmp_scan"),
            "table": os.path.join(self.out_folder, "tmp_table")
        }
        rfam = "Rfam_.cm"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ribo._merge_results(args, os.path.join(self.out_folder,
                                                    "tmp_scan"), suffixs,
                                 tmp_files,
                                 os.path.join(self.out_folder, "tmp_scan"),
                                 os.path.join(self.out_folder, "scan_Rfam"),
                                 os.path.join(self.out_folder, "scan_Rfam"),
                                 os.path.join(self.out_folder,
                                              "gffs"), "riboswitch", log)
Exemplo n.º 39
0
class TestsRNAClass(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_initiate(self):
        out = StringIO()
        key = "test"
        key_list = ["test"]
        class_name = "test_name"
        class_num = 0
        index = {}
        content = "testtest"
        sc.initiate(key, key_list, class_name, class_num, index, out, content)
        self.assertEqual(out.getvalue(), "1testtest\n")

    def test_print_stat_title(self):
        out_stat = StringIO()
        strain = "aaa"
        checks = {"limit": False, "first": True, "utr": False, "inter": False}
        srna_datas = {"aaa": self.example.srnas, "all": self.example.srnas}
        args = self.mock_args.mock()
        args.energy = 0
        args.nr_hits_num = 0
        args.import_info = ["tss", "sec_str", "blast_nr", "blast_srna"]
        class_num, index = sc.print_stat_title(checks, out_stat, strain,
                                               srna_datas, 1, args)
        self.assertEqual(
            out_stat.getvalue(),
            """1 - the normalized(by length of sRNA) free energy change of secondary structure below to 0
2 - sRNA candidates start with TSS (3'UTR derived and interCDS sRNA also includes the sRNA candidates which start with processing site.)
3 - blast can not find the homology from nr database (the cutoff is 0).
4 - blast can not find the homology from sRNA database.
5 - blast can find the homology from sRNA database.
All strains:
""")

        self.assertEqual(class_num, 5)
        self.assertDictEqual(
            index, {
                'sRNA_hit': 5,
                '2d_energy': 1,
                'sRNA_no_hit': 4,
                'nr_no_hit': 3,
                'with_TSS': 2
            })

    def test_import_class(self):
        index = {
            'sRNA_hit': 5,
            '2d_energy': 1,
            'sRNA_no_hit': 4,
            'nr_no_hit': 3,
            'with_TSS': 2
        }
        num_srna = 0
        datas_srna = {}
        datas = {"aaa": self.example.srnas}
        num = sc.import_class(5, datas_srna, datas, index, num_srna, "aaa",
                              "UTR_derived", "5utr", 0, 0)
        self.assertEqual(num, 1)
        self.assertEqual(datas_srna["class_4"][0].start, 230)

    def test_import_data(self):
        datas = {"aaa": self.example.srnas, "all": self.example.srnas}
        index = {
            'sRNA_hit': 5,
            '2d_energy': 1,
            'sRNA_no_hit': 4,
            'nr_no_hit': 3,
            'with_TSS': 2
        }
        num_srna = {
            "total": 0,
            "intergenic": 0,
            "5'UTR_derived": 0,
            "3'UTR_derived": 0,
            "interCDS": 0,
            "in_CDS": 0
        }
        checks = {
            "limit": False,
            "first": True,
            "utr": True,
            "inter": True,
            "in_CDS": True,
            "antisense": False
        }
        datas_rna = sc.import_data(5, datas, index, num_srna, "aaa", checks, 0,
                                   0)
        self.assertEqual(datas_rna["5'UTR_derived"]["class_4"][0].start, 230)
        self.assertEqual(datas_rna["interCDS"]["class_1"][0].start, 140)
        self.assertEqual(datas_rna["in_CDS"]["class_1"][0].start, 6166)
        self.assertEqual(datas_rna["intergenic"]["class_5"][0].start, 5166)

    def test_print_intersection(self):
        num_srna = {
            "total": 3,
            "intergenic": 1,
            "5'UTR_derived": 1,
            "3'UTR_derived": 0,
            "interCDS": 1
        }
        gff_name = os.path.join(self.test_folder, "test")
        out_stat = StringIO()
        keys = ["class_1", "class_4", "class_2", "class_3", "class_5"]
        datas = {
            "class_1": self.example.srnas,
            "class_2": self.example.srnas,
            "class_3": self.example.srnas,
            "class_4": self.example.srnas,
            "class_5": self.example.srnas
        }
        sc.print_intersection(datas, keys, 3, gff_name, "total", out_stat)
        self.assertEqual(
            out_stat.getvalue(),
            "\tclass_1 and class_4 and class_2 and class_3 and class_5 = 4(1.3333333333333333)\n"
        )
        results, attributes = extract_info(
            os.path.join(self.test_folder, "test"), "file")
        self.assertEqual("\n".join(results), self.example.gff_info)

    def test_read_file(self):
        srna_file = os.path.join(self.test_folder, "srna.gff")
        gen_file(srna_file, self.example.gff_file)
        srna_datas, strains, checks = sc.read_file(srna_file)
        self.assertEqual(srna_datas["aaa"][0].start, 140)
        self.assertEqual(srna_datas["aaa"][1].start, 230)
        self.assertEqual(srna_datas["bbb"][0].start, 5166)
        self.assertListEqual(strains, ['all', 'aaa', 'bbb'])
        self.assertDictEqual(
            checks, {
                'inter': True,
                'limit': False,
                'utr': True,
                'antisense': False,
                'in_CDS': True,
                'first': True
            })

    def test_sort_keys(self):
        keys = ["class_3", "class_1", "class_5"]
        final_keys = sc.sort_keys(keys)
        self.assertListEqual(final_keys, ['class_1', 'class_3', 'class_5'])

    def test_classify_srna(self):
        out_stat_file = os.path.join(self.test_folder, "stat")
        srna_file = os.path.join(self.test_folder, "srna.gff")
        gen_file(srna_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.energy = 0
        args.nr_hits_num = 0
        args.in_cds = True
        args.import_info = ["tss", "sec_str"]
        sc.classify_srna(srna_file, self.test_folder, out_stat_file, args)
Exemplo n.º 40
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
Exemplo n.º 41
0
class TestDetectUTR(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_utr(self):
        args = self.mock_args.mock()
        utr_all = {"pri": [], "all": [], "sec": []}
        utr_strain = {
            "pri": {
                "aaa": []
            },
            "all": {
                "aaa": []
            },
            "sec": {
                "aaa": []
            }
        }
        args.source = True
        args.base_5utr = "both"
        args.fuzzy_5utr = 5
        detect = du.import_utr(self.example.tss_fit, utr_strain, utr_all, 140,
                               340, self.example.tas, 200, args)
        self.assertTrue(detect[0])
        self.assertDictEqual(utr_strain, {
            'pri': {
                'aaa': [200]
            },
            'all': {
                'aaa': [200]
            },
            'sec': {
                'aaa': []
            }
        })
        self.assertDictEqual(utr_all, {'pri': [200], 'all': [200], 'sec': []})
        detect = du.import_utr(self.example.tss_nofit, utr_strain, utr_all,
                               1140, 1190, self.example.tas, 50, args)
        self.assertFalse(detect[0])
        self.assertDictEqual(utr_strain, {
            'pri': {
                'aaa': [200]
            },
            'all': {
                'aaa': [200]
            },
            'sec': {
                'aaa': []
            }
        })
        self.assertDictEqual(utr_all, {'pri': [200], 'all': [200], 'sec': []})

    def test_detect_cds(self):
        nears = []
        names = []
        checks = []
        for gene in self.example.genes:
            near_cds, cds_name, check_utr = du.detect_cds(
                self.example.cdss, gene)
            nears.append(near_cds.start)
            names.append(cds_name)
            checks.append(check_utr)
        self.assertEqual(set(nears), set([148, 220, 5400]))
        self.assertEqual(set(names),
                         set(['YP_000001', 'AAA_00002', 'CDS:5400-5800_r']))
        self.assertEqual(set(checks), set([True, True, True]))

    def test_check_associated_TSSpredator(self):
        check_utr, cds_name, near_cds = du.check_associated_TSSpredator(
            self.example.genes, self.example.tss_fit, self.example.cdss, False,
            "NA", "AAA_00001")
        self.assertTrue(check_utr)
        self.assertEqual(cds_name, "YP_000001")
        self.assertEqual(near_cds.start, 148)
        check_utr, cds_name, near_cds = du.check_associated_TSSpredator(
            self.example.genes, self.example.tss_nofit, self.example.cdss,
            False, "NA", "AAA_01001")
        self.assertFalse(check_utr)
        self.assertEqual(cds_name, "NA")
        self.assertEqual(near_cds, None)

    def test_get_5utr_from_TSSpredator(self):
        utr_datas = du.get_5utr_from_TSSpredator(self.example.tss_fit,
                                                 self.example.genes,
                                                 self.example.cdss)
        self.assertTrue(utr_datas["check"])
        self.assertEqual(utr_datas["cds_name"], 'YP_000001')
        self.assertEqual(utr_datas["locus"], 'AAA_00001')
        self.assertEqual(utr_datas["near_cds"].start, 148)
        utr_datas = du.get_5utr_from_TSSpredator(self.example.tss_nofit,
                                                 self.example.genes,
                                                 self.example.cdss)
        self.assertFalse(utr_datas["check"])
        self.assertEqual(utr_datas["cds_name"], None)
        self.assertEqual(utr_datas["locus"], None)
        self.assertEqual(utr_datas["near_cds"], None)

    def test_get_5utr_from_other(self):
        utr_datas = du.get_5utr_from_other(self.example.tss_fit,
                                           self.example.genes,
                                           self.example.cdss, 300)
        self.assertTrue(utr_datas["check"])
        self.assertEqual(utr_datas["cds_name"], 'YP_000001')
        self.assertEqual(utr_datas["locus"], 'AAA_00001')
        self.assertEqual(utr_datas["near_cds"].start, 148)
        utr_datas = du.get_5utr_from_other(self.example.tss_nofit,
                                           self.example.genes,
                                           self.example.cdss, 300)
        self.assertFalse(utr_datas["check"])
        self.assertEqual(utr_datas["cds_name"], None)
        self.assertEqual(utr_datas["locus"], None)
        self.assertEqual(utr_datas["near_cds"], None)

    def test_compare_ta(self):
        args = self.mock_args.mock()
        args.length = 300
        out = StringIO()
        utr_all = {"pri": [], "all": [], "sec": []}
        utr_strain = {
            "pri": {
                "aaa": []
            },
            "all": {
                "aaa": []
            },
            "sec": {
                "aaa": []
            }
        }
        utr_tss = []
        num_utr = 0
        du.compare_ta(self.example.tas, self.example.genes, self.example.cdss,
                      utr_strain, utr_all, out, args, utr_tss, num_utr)
        self.assertEqual(set(out.getvalue().split("\n")[:-1]),
                         set([self.example.out_5utr]))
        out.close()

    def test_detect_5utr(self):
        args = self.mock_args.mock()
        du.read_file = Mock_func().mock_read_file
        du.plot = Mock_func().mock_plot
        out_file = os.path.join(self.test_folder, "5utr.gff")
        args.source = True
        args.base_5utr = "both"
        args.length = 300
        du.detect_5utr("test.tss", "test.gff", "test.ta", out_file, args)
        header = ["##gff-version 3"]
        args.source = False
        args.base_5utr = "both"
        du.detect_5utr("test.tss", "test.gff", "test.ta", out_file, args)
        datas = import_data(out_file)
        ref = header + [self.example.out_5utr_other]
        self.assertEqual(datas[1], ref[1])
        args.base_5utr = "transcript"
        du.detect_5utr("test.tss", "test.gff", "test.ta", out_file, args)
        self.assertEqual(set(datas), set(ref))
        args.source = True
        args.base_5utr = "both"
        du.detect_5utr("test.tss", "test.gff", "test.ta", out_file, args)
        datas = import_data(out_file)
        ref = header + [self.example.out_5utr_tsspredator]
        self.assertListEqual(datas, ref)

    def test_compare_term(self):
        ta_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "TSS",
            "start": 138,
            "end": 540,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_ta = {"ID": "tran0", "Name": "Transcript_0"}
        ta = Create_generator(ta_dict, attributes_ta, "gff")
        term = du.compare_term(ta, self.example.terms, 5)
        self.assertEqual(term.start, 530)

    def test_get_3utr(self):
        ta_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "TSS",
            "start": 138,
            "end": 540,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_ta = {"ID": "tran0", "Name": "Transcript_0"}
        ta = Create_generator(ta_dict, attributes_ta, "gff")
        cds_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "CDS",
            "start": 150,
            "end": 500,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_cds = {"ID": "cds0", "Name": "CDS_0"}
        cds = Create_generator(cds_dict, attributes_cds, "gff")
        attributes = ["ID=3utr0"]
        out = StringIO()
        utr_all = []
        utr_strain = {"aaa": []}
        args = self.mock_args.mock()
        args.length = 300
        args.base_3utr = "transcript"
        args.fuzzy_3utr = 10
        utr_ta = []
        du.get_3utr(ta, cds, utr_all, utr_strain, attributes, 0, out, args,
                    utr_ta)
        self.assertEqual(set(out.getvalue().split("\n")[:-1]),
                         set(self.example.out_3utr.split("\n")))
        out.close()

    def test_get_near_cds(self):
        ta_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "TSS",
            "start": 138,
            "end": 540,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_ta = {"ID": "tran0", "Name": "Transcript_0"}
        ta = Create_generator(ta_dict, attributes_ta, "gff")
        attributes = ["ID=3utr0"]
        near_cds = du.get_near_cds(self.example.cdss, self.example.genes, ta,
                                   attributes, 300)
        self.assertEqual(near_cds.start, 148)

    def test_detect_3utr(self):
        args = self.mock_args.mock()
        args.fuzzy = 5
        args.base_3utr = "transcript"
        args.fuzzy_3utr = 10
        args.length = 300
        du.read_file = Mock_func().mock_read_file
        du.plot = Mock_func().mock_plot
        out_file = os.path.join(self.test_folder, "3utr.gff")
        du.detect_3utr("test.ta", "test.gff", "test.term", out_file, args)
        datas = import_data(out_file)
        self.assertEqual(set(datas),
                         set(self.example.out_3utr_gff.split("\n")))
Exemplo n.º 42
0
class TestCircRNA(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_feature(self):
        attributes_cds = {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001",
                           "protein_id": "YP_918384.3"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "AAA_00001")
        attributes_cds = {"ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "YP_918384.3")
        attributes_cds = {"ID": "cds0", "Name": "CDS_0"}
        attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                       attributes_cds, "gff"))
        self.assertEqual(attributes, "cds0:122-267_f")

    def test_detect_conflict(self):
        circ_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "circRNA", "start": 100,
                     "end": 467, "phase": ".", "strand": "+", "score": ".", "support": 30,
                     "start_site": 30, "end_site": 35, "situation": "P", "splice_type": "C"}
        attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"}
        circrna = Create_generator(circ_dict, attributes_circ, "circ")
        gffs = [Create_generator(self.example.cds_dict, self.example.attributes_cds, "gff")]
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        out = StringIO()
        out_best = StringIO()
        circ.detect_conflict(gffs, circrna, 0, out, out_best, args)
        self.assertEqual(out.getvalue(),
                         "circRNA_0\taaa\t+\t100\t467\tAAA_00001\t30\t1.0\t0.8571428571428571\n")
        out.close()

    def test_get_circrna(self):
        circs = []
        gffs = []
        for index in range(0, 5):
            circs.append(Create_generator(self.example.circ_dict[index],
                                          self.example.attributes_circ[index], "circ"))
        for index in range(0, 3):
            gffs.append(Create_generator(self.example.gffs_dict[index],
                                         self.example.attributes_gffs[index], "gff"))
        out = StringIO()
        out_best = StringIO()
        args = self.mock_args.mock()
        args.start_ratio = 0.3
        args.end_ratio = 0.3
        args.support = 5
        nums = circ.get_circrna(circs, gffs, 50, out, out_best, args)
        self.assertDictEqual(nums["support"], {'aaa': {0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1},
                                               'all': {0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1},
                                               'bbb': {0: 1, 5: 1}})
        self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3})
        self.assertDictEqual(nums["conflict"], {'bbb': {0: 1, 5: 1},
                                                'aaa': {},
                                                'all': {0: 1, 5: 1}})

    def test_detect_circrna(self):
        out_file = os.path.join(self.test_folder, "out_all.csv")
        stat_file = os.path.join(self.test_folder, "stat.csv")
        circ.read_file = Mock_read_file().read_file
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        args.hypo = True
        circ.detect_circrna("test.circ", "test.gff", out_file, args, stat_file)
        circs = import_data(out_file)
        stats = import_data(stat_file)
        self.assertEqual(set(circs), set(self.example.out_file.split("\n")))
        self.assertEqual(set(stats), set(self.example.stat_file.split("\n")))
Exemplo n.º 43
0
class TestsRNADetection(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.sorf = "test_folder/sORF"
        self.out = "test_folder/output"
        self.trans = "test_folder/trans"
        self.fastas = "test_folder/fastas"
        self.tex = "test_folder/tex"
        self.frag = "test_folder/frag"
        self.pros = "test_folder/pros"
        self.terms = "test_folder/terms"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.out)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.fastas)
            os.mkdir(os.path.join(self.fastas, "tmp"))
            os.mkdir(self.tex)
            os.mkdir(self.frag)
            os.mkdir(self.pros)
            os.mkdir(os.path.join(self.pros, "tmp"))
            os.mkdir(self.sorf)
            os.mkdir(os.path.join(self.sorf, "tmp"))
            os.mkdir(self.terms)
        args = self.mock_args.mock()
        args.tss_folder = self.tsss
        args.pro_folder = self.pros
        args.out_folder = self.out
        args.sorf_file = self.sorf
        args.fastas = self.fastas
        args.trans = self.trans
        args.terms = self.terms
        self.srna = sRNADetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        os.chdir(current_path)
        if os.path.exists("tmp"):
            shutil.rmtree("tmp")
        if os.path.exists("tmp_srna.csv"):
            os.remove("tmp_srna.csv")
        if os.path.exists("tmp_srna.gff"):
            os.remove("tmp_srna.gff")
        if os.path.exists("tmp_blast.txt"):
            os.remove("tmp_blast.txt")

    def test_check_folder_exist(self):
        path_ = self.srna._check_folder_exist(self.sorf)
        self.assertEqual(path_, "test_folder/sORF/tmp")

    def test_formatdb(self):
        database = "test_folder/test.fa"
        gen_file(database, "test")
        sr.change_format = self.mock.mock_change_format
        self.srna._run_format = self.mock.mock_run_format
        self.srna._formatdb(database, "type_", self.out, "blast_path", "sRNA")
        self.assertTrue(os.path.exists(os.path.join(self.out, "log.txt")))

    def test_check_necessary_file(self):
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        args = self.mock_args.mock()
        args.trans = self.trans
        args.tsss = self.tsss
        args.pros = self.pros
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.fastas = self.fastas
        args.terms = self.terms
        args.sorf_file = self.sorf
        args.gffs = self.gffs
        args.tex_wigs = self.tex
        args.frag_wigs = self.frag
        args.utr_srna = True
        self.srna._check_necessary_file(args)

    def test_run_program(self):
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        self.srna._run_normal = self.mock.mock_run_normal
        self.srna._run_utrsrna = self.mock.mock_run_utrsrna
        self.srna._merge_tex_frag_datas = self.mock.mock_merge_tex_frag_datas
        sr.filter_frag = self.mock.mock_run_filter_frag
        sr.merge_srna_gff = self.mock.mock_merge_srna_gff
        sr.merge_srna_table = self.mock.mock_merge_srna_table
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.sorf_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"),
                 self.example.sorf_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.sorf_file)
        gen_file(os.path.join(self.tsss, "test_processing.gff"),
                 self.example.sorf_file)
        fuzzy_tsss = {"inter": 3}
        args = self.mock_args.mock()
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.trans = self.trans
        args.tsss = self.tsss
        args.pros = self.pros
        args.max_len = 300
        args.min_len = 30
        args.tex_notex = "tex_notex"
        args.fuzzy_tsss = fuzzy_tsss
        args.out_folder = self.out
        args.table_best = True
        args.wig_path = "wig_path"
        args.merge_wigs = "merge"
        args.libs = "libs"
        args.gffs = self.gffs
        args.in_cds = False
        args.utr_srna = True
        args.cutoff_overlap = 0.5
        prefixs = self.srna._run_program(args)
        self.assertListEqual(prefixs, ['test'])

    def test_get_seq_sec(self):
        sr.extract_energy = self.mock.mock_extract_energy
        self.srna.helper.get_seq = self.mock.mock_get_seq
        self.srna._run_RNAfold = self.mock.mock_run_RNAfold
        os.mkdir(os.path.join(self.out, "tmp_srna"))
        gen_file(os.path.join(self.fastas, "test.fa"), ">test\nAAATTTGGGCCC")
        datas = self.srna._get_seq_sec(self.fastas, self.out, "test",
                                       self.test_folder, self.test_folder,
                                       "vienna_path")
        self.assertEqual(datas["sec"].split("/")[-1], "test_folder")
        self.assertEqual(datas["dot"].split("/")[-1], "test_folder")
        self.assertEqual(datas["main"].split("/")[-1],
                         datas["tmp"].split("/")[-4])
        self.assertEqual(datas["tmp"].split("/")[-1], "tmp_srna")

    def test_replot_sec_to_pdf(self):
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        gen_file(os.path.join(self.tsss, "test.rss.pdf"), "test")
        gen_file(os.path.join(self.tsss, "test.dp.pdf"), "test")
        tmp_paths = {"dot": self.out, "sec": self.fastas, "tmp": self.tsss}
        self.srna._replot_sec_to_pdf("vienna_util", tmp_paths, "ps2pdf14_path",
                                     "test")
        self.assertTrue(
            os.path.exists(os.path.join(tmp_paths["dot"], "test/test.dp.pdf")))
        self.assertTrue(
            os.path.exists(os.path.join(tmp_paths["sec"],
                                        "test/test.rss.pdf")))

    def test_plot_mountain(self):
        self.srna._run_mountain = self.mock.mock_run_mountain
        tmp_paths = {"main": self.test_folder, "tmp": self.tsss}
        moun_path = "fastas"
        gen_file(os.path.join(tmp_paths["tmp"], "test.dp.ps"), "test")
        self.srna._plot_mountain(True, moun_path, tmp_paths, "test",
                                 "vienna_util")
        self.assertTrue("test_folder/fastas/test/test.mountain.pdf")

    def test_compute_2d_and_energy(self):
        sr.extract_energy = self.mock.mock_extract_energy
        sr.change_format = self.mock.mock_change_format
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        self.srna._run_mountain = self.mock.mock_run_mountain
        os.mkdir(os.path.join(self.out, "mountain_plot"))
        sec_path = os.path.join(self.out, "sec_structure")
        os.mkdir(sec_path)
        os.mkdir(os.path.join(sec_path, "sec_plot"))
        os.mkdir(os.path.join(sec_path, "dot_plot"))
        tmp_paths = {
            "dot": self.out,
            "sec": self.fastas,
            "tmp": self.tsss,
            "main": self.test_folder
        }
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_energy_test"), "test")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.fastas = self.fastas
        args.vienna_path = "test"
        args.vienna_util = "test"
        args.mountain = True
        args.ps2pdf14_path = "test"
        self.srna._compute_2d_and_energy(args, ["test"])
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")

    def test_blast(self):
        sr.extract_blast = self.mock.mock_extract_blast
        self.srna._run_blast = self.mock.mock_run_blast
        self.srna._run_format = self.mock.mock_run_format
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_nr_test"), "test")
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        args = self.mock_args.mock()
        args.blast_path = "test"
        args.fastas = self.fastas
        args.out_folder = self.out
        self.srna._blast("database", False, "dna", args, ["test"], "blast_all",
                         "nr", 0.0001)
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")

    def test_class_srna(self):
        sr.classify_srna = self.mock.mock_classify_srna
        sr.gen_srna_table = self.mock.mock_gen_srna_table
        gff_out = os.path.join(self.out, "gffs")
        table_out = os.path.join(self.out, "tables")
        stat_out = os.path.join(self.out, "stat")
        os.mkdir(gff_out)
        os.mkdir(table_out)
        os.mkdir(stat_out)
        os.mkdir(os.path.join(table_out, "for_class"))
        os.mkdir(os.path.join(gff_out, "for_class"))
        args = self.mock_args.mock()
        args.max_len = 300
        args.min_len = 30
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        self.srna._class_srna(["test"], args)
        self.assertTrue(os.path.exists(os.path.join(gff_out,
                                                    "for_class/test")))
        self.assertTrue(
            os.path.exists(os.path.join(table_out, "for_class/test")))

    def test_filter_srna(self):
        sr.classify_srna = self.mock.mock_classify_srna
        sr.gen_srna_table = self.mock.mock_gen_srna_table
        sr.extract_blast = self.mock.mock_extract_blast
        self.srna._run_blast = self.mock.mock_run_blast
        self.srna._run_format = self.mock.mock_run_format
        sr.extract_energy = self.mock.mock_extract_energy
        sr.change_format = self.mock.mock_change_format
        self.srna._run_replot = self.mock.mock_run_replot
        self.srna._convert_pdf = self.mock.mock_convert_pdf
        self.srna._run_mountain = self.mock.mock_run_mountain
        self.srna.multiparser = Mock_multiparser
        self.srna._check_gff = self.mock.mock_check_gff
        self.srna._run_normal = self.mock.mock_run_normal
        self.srna._run_utrsrna = self.mock.mock_run_utrsrna
        sr.merge_srna_gff = self.mock.mock_merge_srna_gff
        sr.merge_srna_table = self.mock.mock_merge_srna_table
        sr.extract_energy = self.mock.mock_extract_energy
        self.srna.helper.get_seq = self.mock.mock_get_seq
        self.srna._run_RNAfold = self.mock.mock_run_RNAfold
        stat_out = os.path.join(self.out, "stat")
        if "mountain_plot" not in os.listdir(self.out):
            os.mkdir(os.path.join(self.out, "mountain_plot"))
        sec_path = os.path.join(self.out, "sec_structure")
        if "sec_structure" not in os.listdir(self.out):
            os.mkdir(sec_path)
            os.mkdir(os.path.join(sec_path, "sec_plot"))
            os.mkdir(os.path.join(sec_path, "dot_plot"))
        gen_file(os.path.join(self.fastas, "tmp/test.fa"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "sRNA_seq_test"),
                 ">test\nAAATTTGGGCCC")
        gen_file(os.path.join(self.out, "tmp_basic_test"),
                 self.example.srna_file)
        gen_file(os.path.join(self.out, "tmp_energy_test"), "test")
        gen_file(os.path.join(self.out, "tmp_nr_test"), "test")
        gen_file(os.path.join(self.out, "tmp_sRNA_test"), "test")
        gen_file(os.path.join(self.out, "tmp_sRNA_test.csv"), "test")
        gen_file(os.path.join(self.test_folder, "srna"), "test")
        gen_file(os.path.join(self.test_folder, "nr"), "test")
        sr.blast_class = self.mock.mock_blast_class
        sr.srna_sorf_comparison = self.mock.mock_srna_sorf_comparison
        args = self.mock_args.mock()
        args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"]
        args.out_folder = self.out
        args.fastas = self.fastas
        args.vienna_path = "test"
        args.vienna_util = "test"
        args.table_best = True
        args.in_cds = False
        args.ps2pdf14_path = "test"
        args.sorf_file = self.sorf
        args.mountain = True
        args.nr_database = os.path.join(self.test_folder, "nr")
        args.srna_database = os.path.join(self.test_folder, "srna")
        args.blast_path = "blast_path"
        args.nr_format = False
        args.srna_format = False
        args.e_nr = 0
        args.e_srna = 0
        self.srna._filter_srna(args, ["test"])
        datas = import_data(os.path.join(self.out, "tmp_basic_test"))
        self.assertEqual("\n".join(datas), "test")
Exemplo n.º 44
0
class TestSNPCalling(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(os.path.join(self.test_folder, "compare_reference"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs"))
            os.mkdir(
                os.path.join(self.test_folder,
                             "compare_reference/seqs/with_BAQ"))
            os.mkdir(
                os.path.join(self.test_folder, "compare_reference/statistics"))
        args = self.mock_args.mock()
        args.types = "reference"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp = os.path.join(self.test_folder, "NC_007795.1.csv")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(
            os.path.join(self.test_folder,
                         "compare_reference/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depth")
        gen_file(depth_file, self.example.depth_file)
        self.snp._transcript_snp(fasta, snp, "test", "with", "test", 10,
                                 self.table, args)
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_reference/statistics/stat_test_with_BAQ_SNP_best.csv")
        )
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_reference/seqs/with_BAQ/test/test_NC_007795.1_1_1.fa")
        )
        self.assertEqual("\n".join(datas),
                         ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_seq_reference.csv")
        os.remove("test_best.vcf")
        os.remove("test_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_sub(self):
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        file_prefixs = {"raw_prefix": "test", "table_prefix": "test"}
        args = self.mock_args.mock()
        self.snp._run_sub(args, "fasta", "with", file_prefixs, "test",
                          self.test_folder, 10)
        self.assertTrue(
            os.path.exists(
                os.path.join(self.test_folder,
                             "compare_reference/seqs/with_BAQ/test")))

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        args.program = ["with_BAQ"]
        self.snp._run_program("fasta", "test", "test", 10, "table", args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_detect_fasta(self):
        datas = self.snp._detect_fasta("test.fa")
        self.assertEqual(datas, (True, 'test'))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.bams = [args.frag_bams, args.normal_bams]
        args.samtools_path = "test"
        num = self.snp._merge_bams(args)
        self.assertEqual(num, 2)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        seq_names = self.snp._get_genome_name(args)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"),
                 "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["with_BAQ"]
        args.bams = [
            os.path.join(self.test_folder, "frag_bams"),
            os.path.join(self.test_folder, "tex_bams")
        ]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        self.snp.run_snp_calling(args)
Exemplo n.º 45
0
class TestTerminator(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.test_folder = "test_folder"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.gffs = "test_folder/gffs"
        self.srnas = "test_folder/srnas"
        self.trans = "test_folder/trans"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.out)
            os.mkdir(self.fastas)
            os.mkdir(self.gffs)
            os.mkdir(self.srnas)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.out, "tables"))
            os.mkdir(os.path.join(self.out, "gffs"))
            os.mkdir(os.path.join(self.gffs, "tmp"))
            os.mkdir(os.path.join(self.fastas, "tmp"))
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.trans = self.trans
        args.out_folder = self.out
        args.srnas = self.srnas
        self.term = Terminator(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        if os.path.exists("tmp_transterm"):
            shutil.rmtree("tmp_transterm")
        if os.path.exists("tmp_term_table"):
            shutil.rmtree("tmp_term_table")
        if os.path.exists("tmp_merge_gff"):
            shutil.rmtree("tmp_merge_gff")

    def test_convert_gff2rntptt(self):
        os.mkdir(os.path.join(self.srnas, "tmp"))
        gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.srnas, "aaa_sRNA.gff"),
                 self.example.srna_file)
        gen_file(os.path.join(self.fastas, "aaa.fa"), self.example.fasta_file)
        file_types, prefixs = self.term._convert_gff2rntptt(
            self.gffs, self.fastas, self.srnas)
        self.assertDictEqual(file_types, {'aaa': 'srna'})
        self.assertListEqual(prefixs, ['aaa'])

    def test_combine_annotation(self):
        test1 = os.path.join(self.test_folder, "test1.ptt")
        test2 = os.path.join(self.test_folder, "test2.ptt")
        gen_file(test1, self.example.ptt)
        gen_file(test2, self.example.ptt)
        files = [test1, test2]
        combine_file = os.path.join(self.test_folder, "combine")
        self.term._combine_annotation(combine_file, files)
        datas = import_data(combine_file)
        result = self.example.ptt.split("\n")[3:]
        self.assertEqual("\n".join(datas), "\n".join(result + result))

    def test_run_TransTermHP(self):
        self.term._TransTermHP = self.mock.mock_TransTermHP
        os.mkdir(os.path.join(self.gffs, "tmp/combine"))
        gen_file(os.path.join(self.gffs, "tmp/combine/aaa.ptt"),
                 self.example.ptt)
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.fastas = self.fastas
        args.hp_folder = self.out
        self.term._run_TransTermHP(args)
        self.assertTrue(os.path.exists(os.path.join(self.out, "aaa")))

    def test_convert_to_gff(self):
        self.term.multiparser = self.mock_parser
        hp_folder = os.path.join(self.out, "aaa")
        os.mkdir(hp_folder)
        gen_file(os.path.join(hp_folder, "aaa_best_terminator_after_gene.bag"),
                 self.example.bag)
        os.mkdir("tmp_transterm")
        args = self.mock_args.mock()
        args.hp_folder = self.out
        args.gffs = self.gffs
        self.term._convert_to_gff(["aaa"], args)
        datas = import_data(os.getcwd() + "/tmp_transterm/aaa_transtermhp.gff")
        self.assertEqual("\n".join(datas), self.example.gff_bag)

    def test_merge_sRNA(self):
        os.mkdir(os.path.join(self.srnas, "tmp"))
        self.term.multiparser = self.mock_parser
        gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"),
                 self.example.srna_file)
        merge = self.term._merge_sRNA(self.srnas, ["aaa"], self.gffs)
        self.assertEqual(merge.split("/")[-1], "tmp_merge_gff")
        shutil.rmtree("tmp_merge_gff")

    def test_move_file(self):
        term_outfolder = self.gffs
        csv_outfolder = self.out
        gen_file(os.path.join(term_outfolder, "aaa_term.gff"),
                 self.example.term_file)
        if (not os.path.exists("tmp_term_table")):
            os.mkdir("tmp_term_table")
        gen_file("tmp_term_table/aaa_term_raw.csv", "test")
        self.term._move_file(term_outfolder, csv_outfolder)
        shutil.rmtree("tmp_term_table")
        self.assertTrue(
            "test_folder/output/gffs/all_candidates/aaa_term_all.gff")
        self.assertTrue(
            "test_folder/output/tables/all_candidates/aaa_term_all.csv")

    def test_compute_intersection_forward_reverse(self):
        self.term.multiparser = self.mock_parser
        te.intergenic_seq = self.mock.mock_intergenic_seq
        te.poly_t = self.mock.mock_poly_t
        te.detect_coverage = self.mock.mock_detect_coverage
        self.term._run_rnafold = self.mock.mock_run_rnafold
        term_outfolder = os.path.join(self.out, "gffs")
        csv_outfolder = os.path.join(self.out, "tables")
        args = self.mock_args.mock()
        args.trans = self.trans
        args.fastas = self.fastas
        args.tex_notex = "tex_notex"
        args.libs = "libs"
        args.replicates = "rep"
        args.RNAfold_path = "test"
        self.term._compute_intersection_forward_reverse(["aaa"],
                                                        self.test_folder,
                                                        "wig_path",
                                                        "merge_wigs", args)
        self.assertTrue(os.path.join(self.out, "inter_seq_aaa"))
        self.assertTrue(os.path.join(self.out, "inter_sec_aaa"))

    def test_compute_stat(self):
        term_outfolder = os.path.join(self.out, "gffs")
        csv_outfolder = os.path.join(self.out, "tables")
        te.stat_term = self.mock.mock_stat_term
        gen_file(
            os.path.join(term_outfolder, "all_candidates/aaa_term_all.gff"),
            self.example.term_file)
        gen_file(os.path.join(term_outfolder, "best_candidates/aaa_term.csv"),
                 self.example.term_file)
        gen_file(
            os.path.join(term_outfolder, "expressed_candidates/aaa_term.csv"),
            self.example.term_file)
        gen_file(
            os.path.join(term_outfolder,
                         "non_expressed_candidates/aaa_term.csv"),
            self.example.term_file)
        args = self.mock_args.mock()
        args.stat = True
        args.out_folder = self.out
        self.term._compute_stat(args)
        self.assertTrue(
            os.path.exists(
                os.path.join(csv_outfolder,
                             "expressed_candidates/aaa_term.csv")))
        self.assertTrue(
            os.path.exists(
                os.path.join(csv_outfolder, "best_candidates/aaa_term.csv")))
        self.assertTrue(
            os.path.exists(
                os.path.join(csv_outfolder,
                             "non_expressed_candidates/aaa_term.csv")))

    def test_run_terminator(self):
        te.stat_term = self.mock.mock_stat_term
        te.intergenic_seq = self.mock.mock_intergenic_seq
        te.poly_t = self.mock.mock_poly_t
        te.detect_coverage = self.mock.mock_detect_coverage
        self.term.multiparser = self.mock_parser
        self.term._run_rnafold = self.mock.mock_run_rnafold
        self.term._TransTermHP = self.mock.mock_TransTermHP
        self.term._compare_term_tran = self.mock.mock_compare_term_tran
        self.term._remove_tmp_file = self.mock.mock_remove_tmp_file
        os.mkdir(os.path.join(self.srnas, "tmp"))
        os.mkdir(os.path.join(self.trans, "tmp"))
        gen_file(os.path.join(self.gffs, "tmp/aaa.gff"), self.example.gff_file)
        gen_file(os.path.join(self.fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"),
                 self.example.srna_file)
        gen_file(os.path.join(self.trans, "tmp/aaa_transcript.gff"),
                 self.example.tran_file)
        tex_wigs = os.path.join(self.test_folder, "tex")
        frag_wigs = os.path.join(self.test_folder, "frag")
        os.mkdir(tex_wigs)
        os.mkdir(frag_wigs)
        gen_file(os.path.join(frag_wigs, "frag.wig"), "text")
        gen_file(os.path.join(tex_wigs, "tex.wig"), "text")
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.trans = self.trans
        args.srnas = self.srnas
        args.tex_wigs = tex_wigs
        args.frag_wigs = frag_wigs
        args.hp_folder = self.test_folder
        args.tex_notex = "tex_notex"
        args.wig_path = self.test_folder
        args.merge_wigs = self.test_folder
        args.RNAfold_path = "RNAfold_path"
        args.stat = True
        args.fuzzy_up_ta = 2
        args.fuzzy_up_gene = 2
        args.fuzzy_down_ta = 2
        args.fuzzy_down_gene = 2
        self.term.run_terminator(args)
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "tables/all_candidates")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out, "tables/expressed_candidates")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "tables/best_candidates")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/all_candidates")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out,
                                        "gffs/expressed_candidates")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/best_candidates")))
Exemplo n.º 46
0
class TestsTSSpredator(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.wigs = "test_folder/wigs"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.fastas = "test_folder/fastas"
        self.manual = "test_folder/manuals"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(self.out)
            os.mkdir(self.wigs)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(self.fastas)
            os.mkdir(self.manual)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.ta_files = self.trans
        args.gffs = self.gffs
        args.wig_folder = self.wigs
        args.fastas = self.fastas
        args.manual = self.manual
        self.tss = TSSpredator(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)
        if os.path.exists("tmp"):
            shutil.rmtree("tmp")

    def test_print_lib(self):
        out = StringIO()
        lib_list = [{
            "condition": 1,
            "replicate": "a",
            "wig": "test_1.wig"
        }, {
            "condition": 2,
            "replicate": "a",
            "wig": "test_2.wig"
        }]
        self.tss._print_lib(2, lib_list, out, self.wigs, "test", ["a"])
        self.assertEqual(out.getvalue(),
                         ("test_1a = test_folder/wigs/test_1.wig\n"
                          "test_2a = test_folder/wigs/test_2.wig\n"))

    def test_import_lib(self):
        out = StringIO()
        libs = [
            "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-",
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-"
        ]
        gen_file(os.path.join(self.wigs, "test1_forward.wig_STRAIN_test.wig"),
                 "test")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig_STRAIN_test.wig"),
                 "test")
        gen_file(
            os.path.join(self.wigs, "test1_TEX_forward.wig_STRAIN_test.wig"),
            "test")
        gen_file(
            os.path.join(self.wigs, "test1_TEX_reverse.wig_STRAIN_test.wig"),
            "test")
        self.tss._import_lib(libs, self.wigs, "test", out, "test.gff", "TSS",
                             "test.fa")
        self.assertListEqual(out.getvalue().split("\n"), [
            "annotation_1 = test.gff",
            "fivePrimeMinus_1a = test_folder/wigs/test1_TEX_reverse.wig",
            "fivePrimePlus_1a = test_folder/wigs/test1_TEX_forward.wig",
            "genome_1 = test.fa", ""
        ])

    def test_gen_config(self):
        os.mkdir(os.path.join(self.out, "MasterTables"))
        os.mkdir(os.path.join(self.wigs, "tmp"))
        config_file = os.path.join(self.test_folder, "config")
        libs = [
            "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-",
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.program = "TSS"
        args.height = 0.3
        args.height_reduction = 0.2
        args.factor = 2.0
        args.factor_reduction = 0.5
        args.base_height = 0.00
        args.enrichment_factor = 2.0
        args.processing_factor = 1.5
        args.utr_length = 300
        args.cluster = 3
        args.repmatch = ["all_2"]
        args.libs = libs
        args.output_prefixs = ["test1"]
        args.specify_strains = None
        self.tss._gen_config("test", args, self.gffs + "/tmp/test.gff",
                             self.wigs + "/tmp", self.fastas + "/tmp/test.fa",
                             config_file)
        datas = import_data(config_file)
        self.assertEqual("\n".join(datas), self.example.config)

    def test_set_gen_config(self):
        os.mkdir(os.path.join(self.fastas, "tmp"))
        os.mkdir(os.path.join(self.gffs, "tmp"))
        os.mkdir(os.path.join(self.wigs, "tmp"))
        os.mkdir(os.path.join(self.out, "MasterTables"))
        gen_file(os.path.join(self.fastas, "tmp/test.fa"), "test")
        gen_file(os.path.join(self.gffs, "tmp/test.gff"), "test")
        gen_file(
            os.path.join(self.wigs, "tmp/test1_forward.wig_STRAIN_test.wig"),
            "test")
        gen_file(
            os.path.join(self.wigs, "tmp/test1_reverse.wig_STRAIN_test.wig"),
            "test")
        gen_file(
            os.path.join(self.wigs,
                         "tmp/test1_TEX_forward.wig_STRAIN_test.wig"), "test")
        gen_file(
            os.path.join(self.wigs,
                         "tmp/test1_TEX_reverse.wig_STRAIN_test.wig"), "test")
        libs = [
            "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-",
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.program = "TSS"
        args.height = 0.3
        args.height_reduction = 0.2
        args.factor = 2.0
        args.factor_reduction = 0.5
        args.base_height = 0.00
        args.enrichment_factor = 2.0
        args.processing_factor = 1.5
        args.utr_length = 300
        args.libs = libs
        args.out_folder = self.out
        args.cluster = 3
        args.repmatch = ["all_2"]
        args.specify_strains = None
        args.output_prefixs = ["test1"]
        self.tss._set_gen_config(args, self.test_folder)
        datas = import_data(os.path.join(self.test_folder, "config_test.ini"))
        self.assertEqual("\n".join(datas), self.example.config)

    def test_convert_gff(self):
        os.mkdir(os.path.join(self.out, "gffs"))
        os.mkdir(os.path.join(self.out, "MasterTables"))
        os.mkdir(os.path.join(self.out, "MasterTables/MasterTable_test"))
        gen_file(
            os.path.join(self.out,
                         "MasterTables/MasterTable_test/MasterTable.tsv"),
            self.example.master)
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.program = "TSS"
        self.tss._convert_gff(["test"], args)
        datas = import_data(os.path.join(self.out, "gffs/test_TSS.gff"))
        self.assertEqual("\n".join(datas), self.example.master_gff)

    def test_merge_wigs(self):
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        libs = [
            "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-",
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-"
        ]
        self.tss._merge_wigs(self.wigs, "test", libs)
        datas = import_data(os.path.join("tmp", "merge_forward.wig"))
        self.assertEqual("\n".join(datas), "test_ftest_f")
        datas = import_data(os.path.join("tmp", "merge_reverse.wig"))
        self.assertEqual("\n".join(datas), "test_rtest_r")
        shutil.rmtree("tmp")

    def test_check_orphan(self):
        os.mkdir(os.path.join(self.out, "gffs"))
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        ts.check_orphan = self.mock.mock_check_orphan
        libs = [
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-", "test1_forward.wig:notex:1:a:+",
            "test1_reverse.wig:notex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.program = "TSS"
        args.gffs = self.gffs
        args.libs = libs
        self.tss._check_orphan(["test"], self.wigs, args)
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/test_TSS.gff")))

    def test_low_expression(self):
        ts.filter_low_expression = self.mock.mock_filter_low_expression
        gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r")
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f")
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r")
        gen_file(os.path.join(self.gffs, "test_TSS.gff"),
                 self.example.tss_file)
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        libs = [
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-", "test1_forward.wig:notex:1:a:+",
            "test1_reverse.wig:notex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.manual = "manual"
        args.libs = libs
        args.wig_folder = self.wigs
        args.program = "TSS"
        args.cluster = 3
        self.tss._low_expression(args, self.gffs)
        shutil.rmtree("tmp")
        datas = import_data(
            os.path.join(
                self.out,
                "statistics/test/stat_test_low_expression_cutoff.csv"))
        self.assertEqual("\n".join(datas),
                         "Genome\tCutoff_coverage\ntest\t100")

    def test_merge_manual(self):
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file)
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        os.mkdir(os.path.join(self.out, "gffs"))
        ts.merge_manual_predict_tss = self.mock.mock_merge_manual_predict_tss
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.manual = "test_folder/manuals/tmp"
        os.mkdir(args.manual)
        gen_file("test_folder/manuals/tmp/test.gff", "test")
        args.wig_folder = self.wigs
        args.out_folder = self.out
        args.program = "TSS"
        args.utr_length = 300
        args.libs = "libs"
        args.cluster = 3
        self.tss._merge_manual(["test"], args)
        self.assertTrue(
            os.path.exists(
                os.path.join(
                    self.out,
                    "statistics/test/stat_compare_TSSpredator_manual_test.csv")
            ))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/test_TSS.gff")))
        shutil.rmtree(args.manual)

    def test_deal_with_overlap(self):
        ts.filter_tss_pro = self.mock.mock_filter_tss_pro
        gen_file(os.path.join(self.out, "test_TSS.gff"), self.example.tss_file)
        gen_file(os.path.join(self.test_folder, "test_processing.gff"),
                 self.example.tss_file)
        args = self.mock_args.mock()
        args.overlap_feature = "overlap"
        args.program = "TSS"
        args.cluster = 3
        args.overlap_gffs = self.test_folder
        self.tss._deal_with_overlap(self.out, args)
        self.assertTrue(os.path.exists(os.path.join(self.out, "test_TSS.gff")))

    def test_stat_tss(self):
        ts.stat_tsspredator = self.mock.mock_stat_tsspredator
        ts.plot_venn = self.mock.mock_plot_venn
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        self.tss._stat_tss(["test"], "TSS")
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out, "statistics/test/test_venn.png")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.out, "statistics/test/test_class.png")))

    def test_validate(self):
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file)
        os.mkdir(os.path.join(self.out, "gffs"))
        ts.validate_gff = self.mock.mock_validate_gff
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.utr_length = 300
        args.out_folder = self.out
        args.program = "tss"
        self.tss._validate(["test"], args)

    def test_compare_ta(self):
        self.tss.multiparser = self.mock_parser
        ts.stat_ta_tss = self.mock.mock_stat_ta_tss
        ta_path = os.path.join(self.trans, "tmp")
        os.mkdir(ta_path)
        os.mkdir(os.path.join(self.out, "gffs"))
        gen_file(os.path.join(ta_path, "test_transcript.gff"),
                 self.example.tran_file)
        args = self.mock_args.mock()
        args.fuzzy = 3
        args.trans = self.trans
        args.gffs = self.gffs
        self.tss._compare_ta(["test"], args)
        self.assertTrue(
            os.path.exists(os.path.join(self.trans, "test_transcript.gff")))
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/test_TSS.gff")))

    def test_run_tsspredator(self):
        os.mkdir(os.path.join(self.out, "gffs"))
        os.mkdir(os.path.join(self.out, "statistics"))
        os.mkdir(os.path.join(self.out, "statistics/test"))
        os.mkdir(os.path.join(self.out, "MasterTables"))
        os.mkdir(os.path.join(self.out, "MasterTables/MasterTable_test"))
        os.mkdir(os.path.join(self.out, "configs"))
        ts.stat_tsspredator = self.mock.mock_stat_tsspredator
        ts.plot_venn = self.mock.mock_plot_venn
        ts.validate_gff = self.mock.mock_validate_gff
        ts.stat_ta_tss = self.mock.mock_stat_ta_tss
        ts.filter_tss_pro = self.mock.mock_filter_tss_pro
        ts.merge_manual_predict_tss = self.mock.mock_merge_manual_predict_tss
        ts.filter_low_expression = self.mock.mock_filter_low_expression
        ts.check_orphan = self.mock.mock_check_orphan
        self.tss._start_to_run = self.mock.mock_start_to_run
        gen_file(os.path.join(self.wigs, "test1_forward.wig"),
                 self.example.wig_f)
        gen_file(os.path.join(self.wigs, "test1_reverse.wig"),
                 self.example.wig_r)
        gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"),
                 self.example.wig_f)
        gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"),
                 self.example.wig_r)
        gen_file(os.path.join(self.trans, "test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.fastas, "test.fa"),
                 ">test\nAAATATATATATATAAATTTATATATATATA")
        libs = [
            "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-",
            "test1_TEX_forward.wig:tex:1:a:+",
            "test1_TEX_reverse.wig:tex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.tsspredator_path = "test"
        args.program = "TSS"
        args.height = 0.3
        args.height_reduction = 0.2
        args.factor = 2.0
        args.factor_reduction = 0.5
        args.base_height = 0.00
        args.enrichment_factor = 2.0
        args.processing_factor = 1.5
        args.utr_length = 300
        args.libs = libs
        args.out_folder = self.out
        args.cluster = 3
        args.repmatch = "all_2"
        args.output_prefixs = "test"
        args.check_orphan = True
        args.manual = "test_folder/manuals"
        args.remove_low_expression = True
        gen_file("test_folder/manuals/test.gff", self.example.tss_file)
        args.overlap_feature = "TSS"
        args.stat = True
        args.overlap_gffs = self.gffs
        args.validate = True
        args.fuzzy = 2
        args.specify_strains = None
        self.tss.run_tsspredator(args)
        self.assertTrue(
            os.path.exists(os.path.join(self.out, "gffs/test_TSS.gff")))
        self.assertTrue(
            os.path.exists(
                os.path.join(
                    self.out,
                    "statistics/test/stat_compare_TSSpredator_manual_test.csv")
            ))
Exemplo n.º 47
0
class TestMergesRNA(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_modify_attributes(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"}
        tar1_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar1 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "antisense"
        }
        tar2_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar2 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        ms.modify_attributes(pre, tar1, "UTR", "pre")
        self.assertEqual(pre.attributes["sRNA_type"], "5utr")
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar2 = Create_generator(tar2_dict, attributes_tar2, "gff")
        ms.modify_attributes(pre, tar2, "UTR", "pre")
        self.assertEqual(pre.attributes["sRNA_type"], "5utr")
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        ms.modify_attributes(pre, tar1, "UTR", "current")
        self.assertEqual(pre.attributes["sRNA_type"], "5utr")
        self.assertEqual(tar1.attributes["sRNA_type"], "5utr")

    def test_detect_overlap(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"}
        tar1_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar1 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr"
        }
        tar2_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 53,
            "end": 233,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar2 = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
        tar2 = Create_generator(tar2_dict, attributes_tar2, "gff")
        overlap = False
        overlap = ms.detect_overlap(tar1, pre, "UTR", overlap)
        self.assertTrue(overlap)
        overlap = False
        overlap = ms.detect_overlap(tar2, pre, "UTR", overlap)
        self.assertFalse(overlap)

    def test_modify_overlap(self):
        pre_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 33,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_pre = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "5utr",
            "with_TSS": "NA",
            "start_cleavage": "cleavage_1,cleavage_2",
            "end_cleavage": "NA"
        }
        tar_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 5,
            "end": 30,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tar = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10"
        }
        pre = Create_generator(pre_dict, attributes_pre, "gff")
        tar = Create_generator(tar_dict, attributes_tar, "gff")
        pre_srna = ms.modify_overlap(pre, tar)
        self.assertEqual(pre_srna.attributes["with_TSS"], "TSS_1")
        self.assertEqual(pre_srna.attributes["start_cleavage"],
                         "cleavage_1,cleavage_2,cleavage3")
        self.assertEqual(pre_srna.attributes["end_cleavage"], "cleavage10")
        self.assertEqual(pre_srna.start, 3)
        self.assertEqual(pre_srna.end, 33)

    def test_merge_srna(self):
        srnas = ms.merge_srna(self.example.srnas_utr, "UTR")
        self.assertEqual(len(srnas), 2)
        self.assertEqual(srnas[0].start, 3)
        self.assertEqual(srnas[1].start, 54)
        self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1")
        self.assertEqual(srnas[1].attributes["with_TSS"], "TSS_3")
        self.assertEqual(srnas[0].attributes["start_cleavage"],
                         "cleavage_1,cleavage_2,cleavage_3")
        self.assertEqual(srnas[1].attributes["start_cleavage"], "cleavage_4")
        srnas = ms.merge_srna(self.example.srnas_int, "inter")
        self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1")
        self.assertEqual(srnas[1].attributes["with_TSS"], "NA")

    def test_merge_srna_gff(self):
        out_file = os.path.join(self.test_folder, "test_out")
        gen_file(os.path.join(self.test_folder, "aaa.gff"),
                 self.example.gff_file)
        ms.read_gff = Mock_func().mock_read_gff
        gffs = {"merge": out_file, "utr": "UTR", "normal": "inter"}
        ms.merge_srna_gff(gffs, False, 0.5,
                          os.path.join(self.test_folder, "aaa.gff"))
        datas, attributes = extract_info(out_file, "file")
        self.assertListEqual(datas,
                             ['aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.'])
        self.assertEqual(
            set(attributes[0]),
            set([
                'overlap_cds=NA', 'Name=sRNA_00000', 'ID=aaa_srna0',
                'sRNA_type=intergenic', 'end_cleavage=cleavage_40',
                'with_TSS=TSS_3', 'overlap_percent=NA'
            ]))

    def test_compare_table(self):
        ms.replicate_comparison = Mock_func().mock_replicate_comparison
        wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}}
        tables = [{
            "strain": "aaa",
            "name": "sRNA_1",
            "start": 3,
            "end": 4,
            "strand": "+",
            "libs": "track_1",
            "detect": "True",
            "avg": 30,
            "high": 100,
            "low": 20,
            "detail": "detail"
        }]
        srna_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 4,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_srna = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10",
            "overlap_cds": "CDS1",
            "overlap_percent": "0.01415"
        }
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        tss_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "TSS",
            "start": 3,
            "end": 3,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_tss = {"ID": "tss0", "Name": "TSS_0", "type": "Orphan"}
        tss = Create_generator(tss_dict, attributes_tss, "gff")
        out = StringIO()
        cutoff_tex = [0, 0, 0, 50, 20]
        cutoff_notex = [0, 0, 0, 30, 10]
        cutoff_frag = [400, 200, 0, 50, 30]
        gen_file("tmp_median", "aaa\t3utr\ttrack_1\t10")
        args = self.mock_args.mock()
        args.replicates = replicates = {"tex": 1, "frag": 1}
        args.texs = texs = {"track_tex_track_notex": 0}
        args.out_folder = os.getcwd()
        args.table_best = True
        args.tex_notex = 2
        ms.compare_table(srna, tables, "utr", wigs, wigs, texs, out, [tss],
                         args)
        self.assertEqual(
            out.getvalue(),
            "aaa\tsrna_0\t3\t4\t+\tfrag_1\t1\tTSS_1;cleavage3\tcleavage10\t22.0\t23\t21\ttrack_1(avg=22.0;high=23;low=21)\tCDS1\t0.01415\n"
        )
        os.remove("tmp_median")

    def test_get_coverage(self):
        wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}}
        srna_cover = ms.get_coverage(wigs, self.example.srnas_int[0])
        self.assertEqual(srna_cover["frag_1"], [{
            'low': 21,
            'track': 'track_1',
            'avg': 1.3548387096774193,
            'final_end': 33,
            'high': 21,
            'pos': 0,
            'final_start': 3,
            'type': 'frag'
        }])

    def test_get_tss_pro(self):
        srna_dict = {
            "seq_id": "aaa",
            "source": "Refseq",
            "feature": "sRNA",
            "start": 3,
            "end": 4,
            "phase": ".",
            "strand": "+",
            "score": "."
        }
        attributes_srna = {
            "ID": "sRNA0",
            "Name": "srna_0",
            "sRNA_type": "3utr",
            "with_TSS": "TSS_1",
            "start_cleavage": "cleavage3",
            "end_cleavage": "cleavage10"
        }
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        tss_pro = ms.get_tss_pro("utr", srna)
        self.assertEqual(tss_pro, ('TSS_1;cleavage3', 'cleavage10'))
Exemplo n.º 48
0
class TestSNPCalling(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(
                os.path.join(self.test_folder,
                             "compare_related_and_reference_genomes"))
            os.mkdir(
                os.path.join(self.test_folder,
                             "compare_related_and_reference_genomes/seqs"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/seqs/with_BAQ"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/statistics"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/SNP_raw_outputs"))
        args = self.mock_args.mock()
        args.types = "related_genome"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp_folder = os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/SNP_raw_outputs/test")
        os.mkdir(snp_folder)
        snp = os.path.join(snp_folder, "test_with_BAQ_NC_007795.1.vcf")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depthNC_007795.1")
        gen_file(depth_file, self.example.depth_file)
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 1,
            "bams": "test",
            "rep": 1
        }]
        self.snp._transcript_snp(
            fasta,
            "test",
            "with",
            "test",
            bam_datas,
            self.table,
            args,
        )
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/statistics/stat_test_with_BAQ_NC_007795.1_SNP_best.csv"
            ))
        print("\n".join(datas))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ/test/test_NC_007795.1_NC_007795.1_1_1.fa"
            ))
        self.assertEqual("\n".join(datas),
                         ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_NC_007795.1_seq_reference.csv")
        os.remove("test_NC_007795.1_best.vcf")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 1,
            "bams": "test",
            "rep": 1
        }]
        args.program = ["with_BAQ"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._run_program("fasta", bam_datas, args, log)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 0,
            "bams": "test",
            "rep": 1
        }]
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.bams = [args.frag_bams, args.normal_bams]
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._merge_bams(args, bam_datas, log)
        self.assertEqual(bam_datas[0]["bam_number"], 1)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 0,
            "bams": "test",
            "rep": 1
        }]
        seq_names = self.snp._get_genome_name(args, bam_datas)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"),
                 "test")
        gen_file(os.path.join(self.test_folder, "tmp_bcf"), "test")
        gen_file(os.path.join(self.fasta, "all.fa.fai"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["with_BAQ"]
        args.bams = [
            "a1:" + os.path.join(self.test_folder, "frag_bams"),
            "a2:" + os.path.join(self.test_folder, "tex_bams")
        ]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp.run_snp_calling(args, log)
Exemplo n.º 49
0
class TestPPI(unittest.TestCase):
    def setUp(self):
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(os.path.join(self.test_folder, "tmp_specific"))
            os.mkdir(os.path.join(self.test_folder, "tmp_nospecific"))
            os.mkdir(os.path.join(self.test_folder, "with_strain"))
            os.mkdir(os.path.join(self.test_folder, "with_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "without_strain"))
            os.mkdir(os.path.join(self.test_folder, "without_strain/test_ptt"))
            os.mkdir(os.path.join(self.test_folder, "all_results"))
            os.mkdir(os.path.join(self.test_folder, "best_results"))
            os.mkdir(os.path.join(self.test_folder, "figures"))
        self.ppi = PPINetwork(self.test_folder)
        self.mock = Mock_func()
        self.example = Example()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_wget_id(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        strain = "test_strain"
        locus = "test_locus"
        strain_id = {
            "ptt": "test_strain",
            "string": "string_test",
            "file": "file_test"
        }
        files = {"id_list": "test", "id_log": "test"}
        detect = self.ppi._wget_id(strain, locus, strain_id, files)
        self.assertTrue(detect)

    def test_retrieve_id(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        strain_id = {
            "ptt": "test_strain",
            "string": "string_test",
            "file": "file_test"
        }
        files = {"id_list": "test", "id_log": "test"}
        genes = [{"strain": "test_strain", "locus_tag": "test_locus"}]
        self.ppi._retrieve_id(strain_id, genes, files)

    def test_get_prefer_name(self):
        row_a = "999.aaa"
        files = {"id_list": self.test_folder}
        gen_file(os.path.join(self.test_folder, "aaa"),
                 "999.aaa\t222\t333\ttest_aaa")
        name = self.ppi._get_prefer_name(row_a, "test", files, "test")
        self.assertEqual(name, "test_aaa")

    def test_get_pubmed(self):
        out_all = StringIO()
        out_best = StringIO()
        out_noall = StringIO()
        out_nobest = StringIO()
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {
            "id_list": self.test_folder,
            "id_log": "test",
            "pubmed_log": "test",
            "all_specific": out_all,
            "best_specific": out_best,
            "all_nospecific": out_noall,
            "best_nospecific": out_nobest
        }
        row = self.example.ppi_line.split("\t")
        strain_id = {
            "file": "test_file",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        mode = "interaction"
        actor = "test_A"
        score = 11241
        id_file = "SAOUHSC_01684"
        ptt = "test_ptt"
        gen_file(os.path.join(self.test_folder, "SAOUHSC_01684"),
                 "93061.SAOUHSC_01684\t93061.SAOUHSC_01683\t333\ttest_aaa")
        gen_file(os.path.join(self.test_folder, "SAOUHSC_01683"),
                 "93061.SAOUHSC_01683\t93061.SAOUHSC_01684\t333\ttest_bbb")
        paths = {
            "all": self.test_folder,
            "fig": self.test_folder,
            "best": self.test_folder
        }
        querys = "all"
        first_output = {
            "specific_all": True,
            "specific_best": True,
            "nospecific_all": True,
            "nospecific_best": True
        }
        args = self.mock_args.mock()
        args.out_folder = self.test_folder
        args.querys = "all"
        args.no_specific = True
        args.score = 19
        self.ppi._get_pubmed(row, strain_id, mode, actor, id_file,
                             first_output, ptt, files, paths, args)
        data = import_data(
            "test_folder/without_strain/test_ptt/test_aaa_test_bbb.csv")
        self.assertEqual("\n".join(data), self.example.with_out)
        data = import_data(
            "test_folder/with_strain/test_ptt/test_aaa_test_bbb.csv")
        self.assertEqual("\n".join(data), self.example.with_out)

    def test_merge_information(self):
        first_output = {
            "specific_all": True,
            "specific_best": True,
            "nospecific_all": True,
            "nospecific_best": True
        }
        out_all = StringIO()
        out_best = StringIO()
        row_a = self.example.ppi_line.split("\t")
        score = 111
        id_file = "SAOUHSC_01684"
        id_folder = self.test_folder
        file_type = "specific"
        all_folder = os.path.join(self.test_folder, "with_strain")
        best_folder = os.path.join(self.test_folder, "without_strain")
        ptt = "test_ptt"
        filename = os.path.join(self.test_folder, "SAOUHSC_01684")
        gen_file(filename, "93061.SAOUHSC_01684\t1000\t333\ttest_aaa")
        self.ppi._merge_information(first_output, filename, out_all, out_best,
                                    row_a, score, id_file, id_folder,
                                    file_type, all_folder, best_folder, ptt)
        self.assertEqual(out_all.getvalue(), self.example.merge_out + "\n")
        self.assertEqual(out_best.getvalue(), self.example.merge_out + "\n")

    def test_detect_protein(self):
        gen_file(os.path.join(self.test_folder, "test"), self.example.ptt_file)
        strain_id = {
            "file": "test",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        args = self.mock_args.mock()
        args.ptts = self.test_folder
        args.querys = "all"
        genes = self.ppi._detect_protein(strain_id, args)
        self.assertListEqual(genes, [{
            'strain': 'Staphylococcus_aureus_HG003',
            'locus_tag': 'SAOUHSC_00001'
        }, {
            'strain': 'Staphylococcus_aureus_HG003',
            'locus_tag': 'SAOUHSC_00002'
        }, {
            'strain': 'Staphylococcus_aureus_HG003',
            'locus_tag': 'SAOUHSC_00003'
        }])

    def test_setup_nospecific(self):
        out_all = StringIO()
        out_best = StringIO()
        out_noall = StringIO()
        out_nobest = StringIO()
        paths = {
            "all": os.path.join(self.test_folder, "all_results"),
            "fig": os.path.join(self.test_folder, "figures"),
            "best": os.path.join(self.test_folder, "best_results")
        }
        strain_id = {
            "file": "test",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        files = {
            "id_list": self.test_folder,
            "id_log": "test",
            "pubmed_log": "test",
            "all_specific": out_all,
            "best_specific": out_best,
            "all_nospecific": out_noall,
            "best_nospecific": out_nobest
        }
        self.ppi._setup_nospecific(paths, strain_id, files)
        files["all_nospecific"].close()
        files["best_nospecific"].close()
        self.assertTrue(
            os.path.exists("test_folder/all_results/without_strain/test_ptt"))
        self.assertTrue(
            os.path.exists("test_folder/best_results/without_strain/test_ptt"))
        self.assertTrue(
            os.path.exists("test_folder/figures/without_strain/test_ptt"))

    def test_setup_folder_and_read_file(self):
        paths = {
            "all": os.path.join(self.test_folder, "all_results"),
            "fig": os.path.join(self.test_folder, "figures"),
            "best": os.path.join(self.test_folder, "best_results")
        }
        strain_id = {
            "file": "test.ptt",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        files = {
            "id_list": self.test_folder,
            "id_log": "",
            "pubmed_log": "",
            "all_specific": "",
            "best_specific": "",
            "all_nospecific": "",
            "best_nospecific": "",
            "action_log": ""
        }
        gen_file(os.path.join(self.test_folder, "test.ptt"),
                 self.example.ptt_file)
        args = self.mock_args.mock()
        args.querys = "all"
        args.no_specific = True
        args.out_folder = self.test_folder
        args.ptts = self.test_folder
        genes = self.ppi._setup_folder_and_read_file(strain_id, "", files,
                                                     paths, args)
        for index in ("all_specific", "all_nospecific", "best_specific",
                      "best_nospecific", "id_log", "action_log", "pubmed_log"):
            files[index].close()
        self.assertTrue(os.path.exists("test_folder/best_results/test"))
        self.assertTrue(os.path.exists("test_folder/all_results/test"))
        self.assertListEqual(genes, [{
            'locus_tag': 'SAOUHSC_00001',
            'strain': 'Staphylococcus_aureus_HG003'
        }, {
            'locus_tag': 'SAOUHSC_00002',
            'strain': 'Staphylococcus_aureus_HG003'
        }, {
            'locus_tag': 'SAOUHSC_00003',
            'strain': 'Staphylococcus_aureus_HG003'
        }])

    def test_wget_actions(self):
        gen_file(os.path.join(self.test_folder, "test.txt"), "93061\ttest")
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {
            "id_list": self.test_folder,
            "id_log": "",
            "pubmed_log": "",
            "all_specific": "",
            "best_specific": "",
            "all_nospecific": "",
            "best_nospecific": "",
            "action_log": ""
        }
        strain_id = {
            "file": "test.ptt",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        id_file = "test.txt"
        self.ppi._wget_actions(files, id_file, strain_id, self.test_folder)

    def test_retrieve_actions(self):
        self.ppi._run_wget = self.mock.mock_run_wget
        files = {
            "id_list": os.path.join(self.test_folder, "tmp_specific"),
            "id_log": "",
            "pubmed_log": "",
            "all_specific": "",
            "best_specific": "",
            "all_nospecific": "",
            "best_nospecific": "",
            "action_log": ""
        }
        strain_id = {
            "file": "test.ptt",
            "ptt": "test_ptt",
            "string": "test_string",
            "pie": "test_pie"
        }
        paths = {
            "all": os.path.join(self.test_folder, "all_results"),
            "fig": os.path.join(self.test_folder, "figures"),
            "best": os.path.join(self.test_folder, "best_results")
        }
        gen_file(os.path.join(self.test_folder, "tmp_specific/test.txt"),
                 "93061\ttest")
        gen_file(os.path.join(self.test_folder, "tmp_action"),
                 self.example.ppi_line)
        args = self.mock_args.mock()
        args.no_specific = True
        args.querys = "all"
        args.out_folder = self.test_folder
        self.ppi._retrieve_actions(files, strain_id, paths, args)
Exemplo n.º 50
0
class TestsRNATargetPrediction(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.example = Example()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.gffs = "test_folder/gffs"
        self.srnas = "test_folder/srnas"
        self.out = "test_folder/output"
        self.fastas = "test_folder/fastas"
        self.seq = "test_folder/output/sRNA_seqs"
        self.rnaup = "test_folder/output/RNAup_results"
        self.rnaplex = "test_folder/output/RNAplex_results"
        self.merge = "test_folder/output/merged_results"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.gffs)
            os.mkdir(self.out)
            os.mkdir(self.srnas)
            os.mkdir(self.fastas)
            os.mkdir(self.rnaup)
            os.mkdir(self.rnaplex)
            os.mkdir(self.seq)
            os.mkdir(self.merge)
            os.mkdir(os.path.join(self.rnaup, "test"))
        args = self.mock_args.mock()
        args.out_folder = self.out
        args.srnas = self.srnas
        args.fastas = self.fastas
        args.gffs = self.gffs
        self.star = sRNATargetPrediction(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_sort_srna_fasta(self):
        fasta = os.path.join(self.fastas, "test.fa")
        gen_file(fasta, ">aaa\nAAAAAAAA\n>bbb\nCCCC\n>ccc\nGGGGGGGGGGGG")
        self.star._sort_srna_fasta(fasta, "test", self.test_folder)
        datas = import_data(os.path.join(self.test_folder, "tmp_srna_target_test_sRNA.fa"))
        self.assertListEqual(datas, ['>bbb', 'CCCC', '>aaa', 'AAAAAAAA',
                                     '>ccc', 'GGGGGGGGGGGG'])

    def test_read_fasta(self):
        fasta = os.path.join(self.fastas, "test.fa")
        gen_file(fasta, ">aaa\nAAAAAAAA")        
        seq = self.star._read_fasta(fasta)
        self.assertEqual(seq, "AAAAAAAA")

    def test_get_specific_seq(self):
        srna_file = os.path.join(self.test_folder, "aaa_sRNA.gff")
        seq_file = os.path.join(self.test_folder, "aaa.fa")
        srna_out = os.path.join(self.test_folder, "out")
        gen_file(srna_file, self.example.srna_file)
        gen_file(seq_file, self.example.seq_file)
        self.star._get_specific_seq(srna_file, seq_file, srna_out,
                                    ["aaa:5:8:+"])
        datas = import_data(srna_out)
        self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT')

    def test_gen_seq(self):
        srna_seq = os.path.join(self.out, "sRNA_seqs")
        tar_seq = os.path.join(self.out, "target_seqs")
        os.mkdir(os.path.join(self.srnas, "tmp"))
        os.mkdir(os.path.join(self.fastas, "tmp"))
        os.mkdir(os.path.join(self.gffs, "tmp"))
        os.mkdir(tar_seq)
        gen_file(os.path.join(self.srnas, "tmp", "aaa_sRNA.gff"),
                 self.example.srna_file)
        gen_file(os.path.join(self.fastas, "tmp", "aaa.fa"),
                 self.example.seq_file)
        gen_file(os.path.join(self.gffs, "tmp", "aaa.gff"),
                 self.example.gff_file)
        args = self.mock_args.mock()
        args.query = ["aaa:5:8:+"]
        args.features = ["CDS"]
        args.tar_start = 3
        args.tar_end = 5
        self.star._gen_seq(["aaa"], args)
        datas = import_data(os.path.join(srna_seq, "aaa_sRNA.fa"))
        self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT')
        datas = import_data(os.path.join(tar_seq, "aaa_target_1.fa"))
        self.assertEqual("\n".join(datas),
                         '>AAA_000001_cds0_12-16_+\nTAAATTCC')

    def test_rna_plex(self):
        self.star._run_rnaplex = self.mock.mock_run_rnaplex
        self.star._run_rnaplfold = self.mock.mock_run_rnaplfold
        os.mkdir("test_folder/test")
        gen_file("test_folder/test/test_RNAplex.txt", "test")
        gen_file(os.path.join(self.test_folder, "aaa_RNAplex.txt"),
                 self.example.rnaplex)
        args = self.mock_args.mock()
        args.vienna_path = "test"
        args.win_size_s = 5
        args.win_size_t = 5
        args.span_s = 5
        args.span_t = 5
        args.unstr_region_rnaplex_s = 5
        args.rnaplfold_path = None
        args.unstr_region_rnaplex_t = 5
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.star._rna_plex(["test"], args, log)
        datas = import_data("test_folder/test/test_RNAplex.txt")
        self.assertEqual("\n".join(datas), "test")

    def test_get_continue(self):
        out_rnaup = os.path.join(self.test_folder, "rnaup.txt")
        gen_file(out_rnaup, self.example.rnaup)
        srnas = self.star._get_continue(out_rnaup)
        self.assertListEqual(srnas, ["srna437"])

    def test_rnaup(self):
        self.star._run_rnaup = self.mock.mock_run_rnaup
        gen_file(os.path.join(self.out, "sRNA_seqs/tmp_srna_target_test_sRNA.fa"),
                 ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG")
        gen_file(os.path.join(self.gffs, "test_target.fa"),
                 ">AAA_000001|CDS_00000\nAAATTAATTAAATTCCGGCCGGCCGG")
        args = self.mock_args.mock()
        args.srnas = self.srnas
        args.fastas = self.fastas
        args.gffs = self.gffs
        args.vienna_path = "test"
        args.out_folder = self.out
        args.core_up = 4
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.star._rnaup(["test"], args, log)
        datas = import_data(os.path.join(self.out, "tmp_srna_target1.fa"))
        self.assertEqual("\n".join(datas),
                         ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG")
Exemplo n.º 51
0
class TestOperonDetection(unittest.TestCase):
    def setUp(self):
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.tsss = os.path.join(self.test_folder, "tsss")
        self.trans = os.path.join(self.test_folder, "trans")
        self.utr5s = os.path.join(self.test_folder, "utr5s")
        self.utr3s = os.path.join(self.test_folder, "utr3s")
        self.output = os.path.join(self.test_folder, "output")
        self.gffs = os.path.join(self.test_folder, "gffs")
        self.out_gff = os.path.join(self.output, "gffs")
        self.stat = os.path.join(self.test_folder, "stat")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(self.stat)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.utr5s)
            os.mkdir(os.path.join(self.utr5s, "tmp"))
            os.mkdir(self.utr3s)
            os.mkdir(os.path.join(self.utr3s, "tmp"))
            os.mkdir(self.output)
            os.mkdir(self.out_gff)
            os.mkdir(os.path.join(self.output, "tables"))
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.trans = self.trans
        args.utr5s = self.utr5s
        args.utr3s = self.utr3s
        args.output_folder = self.output
        args.terms = None
        self.operon = OperonDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_detect_operon(self):
        op.operon = self.mock.mock_operon
        gen_file(os.path.join(self.tsss, "tmp", "test_TSS.gff"), "test")
        gen_file(os.path.join(self.trans, "tmp", "test_transcript.gff"),
                 "test")
        gen_file(os.path.join(self.gffs, "test.gff"), "test")
        args = self.mock_args.mock()
        args.gffs = self.out_gff
        args.term_fuzzy = 3
        args.tss_fuzzy = 3
        args.length = 100
        self.operon._detect_operon(["test"], args)
        self.assertTrue(
            os.path.exists(
                os.path.join(self.output, "tables", "operon_test.csv")))

    def test_stat(self):
        op.stat = self.mock.mock_stat
        table_file = os.path.join(self.output, "tables", "operon_test.csv")
        if not os.path.exists(table_file):
            gen_file(table_file, "test")
        self.operon._stat(os.path.join(self.output, "tables"), self.stat)
        self.assertTrue(
            os.path.exists(os.path.join(self.stat, "stat_operon_test.csv")))

    def test_combine_gff(self):
        op.combine_gff = self.mock.mock_combine_gff
        gen_file(os.path.join(self.tsss, "tmp", "test_TSS.gff"), "test")
        gen_file(os.path.join(self.trans, "tmp", "test_transcript.gff"),
                 "test")
        gen_file(os.path.join(self.gffs, "test.gff"), "test")
        gen_file(os.path.join(self.utr5s, "tmp", "test_5UTR.gff"), "test")
        gen_file(os.path.join(self.utr3s, "tmp", "test_3UTR.gff"), "test")
        args = self.mock_args.mock()
        args.gffs = self.out_gff
        args.term_fuzzy = 3
        args.tss_fuzzy = 3
        self.operon._combine_gff(["test"], args)
        self.assertTrue(
            os.path.exists(os.path.join(self.out_gff, "test_operon.gff")))
Exemplo n.º 52
0
class TestRATT(unittest.TestCase):

    def setUp(self):
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.ref_embls = "test_folder/embls"
        self.output_path = "test_folder/output"
        self.tar_fastas = "test_folder/tar_fasta"
        self.ref_fastas = "test_folder/ref_fasta"
        self.gff_outfolder = "test_folder/gffs"
        self.ref_gbk = "test_folder/gbk"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.ref_embls)
            os.mkdir(self.ref_gbk)
            os.mkdir(self.output_path)
            os.mkdir(self.tar_fastas)
            os.mkdir(self.ref_fastas)
            os.mkdir(self.gff_outfolder)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.ref_embls = self.ref_embls
        args.ref_gbk = self.ref_gbk
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.gff_outfolder = self.gff_outfolder
        self.ratt = RATT(args)
        self.example = Example()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_convert_to_pttrnt(self):
        files = ["aaa.gff"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        gen_file(os.path.join(self.test_folder, "aaa.gff"),
                 self.example.gff_file)
        os.mkdir(os.path.join(self.tar_fastas, "tmp"))
        gen_file(os.path.join(self.tar_fastas, "tmp/aaa.fa"),
                 self.example.fasta_file)
        self.ratt._convert_to_pttrnt(self.test_folder, files, log)
        data = import_data(os.path.join(self.test_folder, "aaa.rnt"))
        self.assertEqual("\n".join(data), self.example.rnt_file)
        data = import_data(os.path.join(self.test_folder, "aaa.ptt"))
        self.assertEqual("\n".join(data), self.example.ptt_file)

    def test_convert_to_gff(self):
        files = ["aaa.gff"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        ratt_result = "chromosome.aaa.final.embl"
        gen_file(os.path.join(self.output_path, ratt_result),
                 self.example.embl_file)
        args = self.mock_args.mock()
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        self.ratt._convert_to_gff(ratt_result, args, files, log)
        data = import_data(os.path.join(self.output_path, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)
        data = import_data(os.path.join(self.gff_outfolder, "aaa.gff"))
        self.assertEqual("\n".join(data), self.example.embl_gff)

    def test_parser_embl_gbk(self):
        files = [os.path.join(self.test_folder, "aaa.gbk")]
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file)
        self.ratt._parser_embl_gbk(files)
        data = import_data(os.path.join(self.ref_gbk,
                           "gbk_tmp/NC_007795.1.gbk"))
        self.assertEqual(
            "\n".join(data),
            self.example.gbk_file.split("//")[0] + "//")
        data = import_data(os.path.join(
            self.ref_gbk, "gbk_tmp/NC_007799.1.gbk"))
        self.assertEqual(
            "\n".join(data),
            self.example.gbk_file.split("//")[1].strip() + "\n//")

    def test_convert_embl(self):
        gen_file(os.path.join(self.test_folder, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        out = self.ratt._convert_embl(self.test_folder, log)
        self.assertEqual(out, "test_folder/gbk/gbk_tmp")
        self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp"))

    def test_format_and_run(self):
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.output_path
        args.pairs = ["NC_007795.1:Staphylococcus_aureus_HG003"]
        args.element = "chromosome"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ratt._format_and_run(args, log)

    def test_annotation_transfer(self):    
        gen_file(os.path.join(self.ref_fastas, "aaa.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.tar_fastas, "bbb.fa"),
                 self.example.fasta_file)
        gen_file(os.path.join(self.ref_embls, "aaa.gbk"),
                 self.example.gbk_file.split("//")[0])
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.ratt._run_ratt = Mock_func().mock_run_ratt
        args = self.mock_args.mock()
        args.element = "element"
        args.ref_embls = self.ref_embls
        args.tar_fastas = self.tar_fastas
        args.ref_fastas = self.ref_fastas
        args.output_path = self.output_path
        args.gff_outfolder = self.gff_outfolder
        args.pairs = ["aaa:bbb"]
        args.convert = True
        self.ratt.annotation_transfer(args, log)
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.gff")))
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.rnt")))
        self.assertTrue(os.path.exists(
            os.path.join(self.gff_outfolder, "bbb.ptt")))
Exemplo n.º 53
0
class TestsORFDetection(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.fasta = "test_folder/fasta"
        self.wigs = "test_folder/wig"
        self.gff = "test_folder/gff"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.wigs)
            os.mkdir(self.gff)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_get_coverage(self):
        coverages = {
            "3utr": "median",
            "5utr": "median",
            "inter": 5,
            "interCDS": "median"
        }
        medianlist = {
            "aaa": {
                "3utr": {
                    "track_1": {
                        "median": 3
                    }
                },
                "5utr": {
                    "track_1": {
                        "median": 6
                    }
                },
                "interCDS": {
                    "track_1": {
                        "median": 2
                    }
                },
                "inter": {
                    "track_1": {
                        "median": 5
                    }
                }
            }
        }
        cutoffs = {"track_1": 0}
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1]
        }
        covers = sd.get_coverage(sorf, self.example.wigs, "+", coverages,
                                 medianlist, cutoffs, 10)
        self.assertDictEqual(
            covers, {
                'frag_1': [{
                    'low': 2,
                    'avg': 33.4,
                    'high': 100,
                    'pos': 2,
                    'track': 'track_1',
                    'type': 'frag'
                }]
            })

    def test_detect_rbs_site(self):
        args = self.mock_args.mock()
        args.max_len = 20
        args.min_len = 3
        args.fuzzy_rbs = 2
        args.rbs_seq = ["AGGAGG"]
        detect = sd.detect_rbs_site("AGGAGGCCGCTATGCCACACGT", 2,
                                    self.example.tas[0], args)
        self.assertListEqual(detect, [1])

    def test_detect_start_stop(self):
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        args = self.mock_args.mock()
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.max_len = 20
        args.min_len = 3
        args.fuzzy_rbs = 2
        args.rbs_seq = ["AGGAGG"]
        args.multi_stop = True
        sorf = sd.detect_start_stop(self.example.tas, seq, args)
        self.assertListEqual(sorf, [{
            'strand': '+',
            'type': 'intergenic',
            'starts': ['13'],
            'print': False,
            'seq': 'ATGCCATTA',
            'ends': ['21'],
            'end': 21,
            'start': 13,
            'rbs': [2],
            'strain': 'aaa'
        }])
        seq = {"aaa": "TTAAAGGCATTATCCTCCTA"}
        self.example.tas[0].strand = "-"
        sorf = sd.detect_start_stop(self.example.tas, seq, args)
        self.assertListEqual(sorf, [{
            'end': 10,
            'starts': ['2'],
            'strain': 'aaa',
            'ends': ['10'],
            'type': 'intergenic',
            'print': False,
            'seq': 'TAAAGGCAT',
            'rbs': [19],
            'strand': '-',
            'start': 2
        }])
        self.example.tas[0].strand = "+"

    def test_read_data(self):
        inter = os.path.join(self.test_folder, "inter")
        fasta = os.path.join(self.test_folder, "fa")
        gen_file(inter, self.example.inter)
        gen_file(fasta, ">aaa\nATATACCGATC")
        inters, tsss, srnas, seq = sd.read_data(inter, None, None, fasta, True)
        self.assertEqual(inters[0].start, 2)
        self.assertDictEqual(seq, {'aaa': 'ATATACCGATC'})

    def test_check_tss(self):
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "with_TSS": []
        }
        checks = {"start": False, "rbs": False, "import": False}
        sd.check_tss(sorf, self.example.tsss[0], 300, checks)
        self.assertDictEqual(checks, {
            'start': True,
            'rbs': [1],
            'import': True
        })

    def test_compare_sorf_tss(self):
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1]
        }]
        args = self.mock_args.mock()
        args.utr_length = 300
        args.noafter_tss = False
        args.no_tss = False
        sorfs_all, sorfs_best = sd.compare_sorf_tss(sorfs, self.example.tsss,
                                                    "tss", args)
        self.assertListEqual(sorfs_all, [{
            'print': False,
            'ends': ['10'],
            'strand': '+',
            'end': 6,
            'type': '3utr',
            'starts': ['2'],
            'seq': 'ATGTA',
            'strain': 'aaa',
            'start': 2,
            'rbs': [1],
            'start_TSS': '1_+',
            'with_TSS': ['TSS:1_+']
        }])
        self.assertListEqual(sorfs_best, [{
            'print': False,
            'ends': ['10'],
            'strand': '+',
            'end': 6,
            'type': '3utr',
            'starts': ['2'],
            'seq': 'ATGTA',
            'strain': 'aaa',
            'start': 2,
            'rbs': [1],
            'with_TSS': ['TSS:1_+'],
            'start_TSS': '1_+'
        }])

    def test_compare_sorf_srna(self):
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1]
        }]
        sd.compare_sorf_srna(sorfs, self.example.srnas, "test")
        self.assertListEqual(sorfs, [{
            'print': False,
            'starts': ['2'],
            'seq': 'ATGTA',
            'strand': '+',
            'srna': ['sRNA:5-8_+'],
            'end': 6,
            'rbs': [1],
            'ends': ['10'],
            'start': 2,
            'strain': 'aaa',
            'type': '3utr'
        }])

    def test_import_overlap(self):
        sorf1 = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "srna": ["NA"],
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "start_TSS": "1"
        }
        sorf2 = {
            "strain": "aaa",
            "strand": "+",
            "start": 5,
            "end": 15,
            "srna": ["NA"],
            "starts": [str(5)],
            "ends": [str(15)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [2],
            "start_TSS": "2"
        }
        final = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "srna": ["NA"],
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "start_TSS": "1"
        }
        sd.import_overlap(sorf2, final, sorf1, True)
        self.assertDictEqual(
            final, {
                'end': 15,
                'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'],
                'start': 2,
                'rbs': [1, 2],
                'strand': '+',
                'strain': 'aaa',
                'print': False,
                'seq': 'ATGTA',
                'ends': ['10', '15'],
                "srna": ["NA"],
                'start_TSS': '1',
                'type': '3utr',
                'starts': ['2', '5']
            })

    def test_merge(self):
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "srna": ["sRNA1"],
            "start_TSS": "1"
        }, {
            "strain": "aaa",
            "strand": "+",
            "start": 5,
            "end": 15,
            "starts": [str(5)],
            "ends": [str(15)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [2],
            "srna": ["sRNA2"],
            "start_TSS": "2"
        }]
        finals = sd.merge(sorfs, seq)
        self.assertDictEqual(
            finals[0], {
                'start_TSS': '1',
                'rbs': [1, 2],
                'strand': '+',
                'strain': 'aaa',
                'start': 2,
                'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'],
                'ends': ['10', '6', '15'],
                'starts': ['2', '5'],
                'type': '3utr',
                'end': 15,
                'seq': 'AGGAGGCCGCTATG',
                "srna": ["sRNA1", "sRNA2"]
            })

    def test_assign_utr_cutoff(self):
        coverages = {
            "3utr": "median",
            "5utr": 20,
            "interCDS": 11,
            "intergenic": 59
        }
        medians = {"track": {"median": 50, "mean": 20}}
        cutoff = sd.assign_utr_cutoff(coverages, "3utr", medians, "track", 10)
        self.assertEqual(cutoff, 50)

    def test_get_cutoff(self):
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "start_TSS": "1"
        }
        coverages = {
            "3utr": "median",
            "5utr": 20,
            "interCDS": 11,
            "intergenic": 59
        }
        medians = {"aaa": {"3utr": {"track_1": {"median": 50, "mean": 20}}}}
        cutoff = sd.get_cutoff(sorf, "track_1", coverages, medians, 10)
        self.assertEqual(cutoff, 50)

    def test_get_attribute(self):
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "1",
            "with_TSS": "NA",
            "srna": "NA",
            "shift": 1
        }
        string = sd.get_attribute(1, "sORF_1", "4", sorf, "utr")
        self.assertEqual(
            string,
            "ID=aaa_sorf1;Name=sORF_sORF_1;start_TSS=4;with_TSS=N,A;sORF_type=3utr;sRNA=N,A;rbs=1;frame_shift=1"
        )

    def test_print_file(self):
        out_g = StringIO()
        out_t = StringIO()
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 10,
            "end": 15,
            "starts": [str(10)],
            "ends": [str(15)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["3"],
            "start_TSS": "1",
            "with_TSS": ["NA"],
            "srna": ["NA"],
            "candidate": ["AAA"],
            "shift": 1
        }
        sorf_datas = {
            "best": 20,
            "high": 50,
            "low": 10,
            "start": 1,
            "end": 10,
            "track": "track_1",
            "detail": [],
            "conds": {
                "frag": "track_1"
            }
        }
        args = self.mock_args.mock()
        args.table_best = True
        args.print_all = True
        sd.print_file(sorf, sorf_datas, 1, out_g, out_t, "best", args)
        self.assertEqual(
            out_g.getvalue(),
            "aaa\tANNOgesic\tsORF\t10\t15\t.\t+\t.\tID=aaa_sorf1;Name=sORF_00001;start_TSS=1;with_TSS=NA;sORF_type=3utr;sRNA=NA;rbs=RBS_3;frame_shift=1\n"
        )
        self.assertEqual(
            out_t.getvalue(),
            "aaa\tsORF_00001\t10\t15\t+\t3'UTR_derived\tNA\tRBS_3\t10\t15\tNA\t1\tFragmented\t20\t\tATGTA\tAAA\n"
        )

    def test_print_table(self):
        out_t = StringIO()
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "1",
            "with_TSS": ["NA"],
            "srna": ["NA"],
            "candidate": ["AAA"],
            "shift": 1
        }
        sorf_datas = {
            "best": 20,
            "high": 50,
            "low": 10,
            "start": 1,
            "end": 10,
            "track": "track_1",
            "detail": [],
            "conds": {
                "frag": "track_1"
            }
        }
        args = self.mock_args.mock()
        args.table_best = True
        args.print_all = True
        sd.print_table(out_t, sorf, "test", "3utr", "frag", sorf_datas, args)
        self.assertEqual(
            out_t.getvalue(),
            "aaa\tsORF_test\t2\t6\t+\t3utr\tNA\t1\t2\t10\tNA\t1\tfrag\t20\t\tATGTA\tAAA\n"
        )

    def test_get_inter_coverage(self):
        inter_covers = {}
        inters = [{"frag": [{"track": "track_1", "avg": 22}]}]
        sd.get_inter_coverage(inters, inter_covers)
        self.assertDictEqual(inter_covers, {'track_1': [22]})

    def test_detect_utr_type(self):
        ta_dict = [{
            "seq_id": "aaa",
            "source": "intergenic",
            "feature": "Transcript",
            "start": 1,
            "end": 23,
            "phase": ".",
            "strand": "+",
            "score": "."
        }]
        attributes_tas = [{
            "ID": "tran0",
            "Name": "Transcript_0",
            "UTR_type": "intergenic"
        }]
        tas = []
        tas.append(Create_generator(ta_dict[0], attributes_tas[0], "gff"))
        sd.get_coverage = self.mock.mock_get_coverage
        med_inters = {"aaa": {"intergenic": []}}
        sd.detect_utr_type(tas[0], "intergenic", med_inters, "wigs", "+",
                           "test")
        sd.get_coverage = get_coverage
        self.assertDictEqual(med_inters, {'aaa': {'intergenic': ["2"]}})

    def test_median_score(self):
        num = sd.median_score([1, 3, 11, 42, 2, 32, 111], "p_0.5")
        self.assertEqual(num, 11)

    def test_mean_score(self):
        num = sd.mean_score([1, 3, 11, 42, 2, 32, 111])
        self.assertEqual(num, 28.857142857142858)

    def test_validate_tss(self):
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "3",
            "with_TSS": ["TSS:3_+"],
            "srna": ["NA"],
            "candidate": ["AAA"]
        }
        datas = sd.validate_tss([2], [6], sorf, 300)
        self.assertEqual(datas, (['TSS:3_+'], 'NA'))

    def test_validate_srna(self):
        sorf = {
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "1",
            "with_TSS": ["TSS:3_+"],
            "srna": ["sRNA:2-5_+"],
            "candidate": ["AAA"]
        }
        srnas = sd.validate_srna([2], [6], sorf)
        self.assertListEqual(srnas, ['sRNA:2-5_+'])

    def test_get_best(self):
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "1",
            "with_TSS": ["TSS:3_+"],
            "srna": ["sRNA:2-5_+"],
            "candidate": ["2-6_TSS:3_RBS:1"]
        }]
        args = self.mock_args.mock()
        args.table_best = True
        args.no_srna = True
        args.utr_length = 300
        data = sd.get_best(sorfs, "tss", "srna", args)
        self.assertListEqual(data, [{
            'type': '3utr',
            'strand': '+',
            'print': False,
            'with_TSS': ['TSS:3_+'],
            'starts': ['2'],
            'start': 2,
            'srna': ['sRNA:2-5_+'],
            'rbs': ['1'],
            'end': 6,
            'seq': 'ATGTA',
            'start_TSS': '1',
            'strain': 'aaa',
            'ends': ['10'],
            'candidate': ['2-6_TSS:3_RBS:1']
        }])

    def test_coverage_and_output(self):
        out_t = StringIO()
        out_g = StringIO()
        sd.get_coverage = self.mock.mock_get_coverage
        sd.replicate_comparison = self.mock.mock_replicate_comparison
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 10,
            "end": 15,
            "starts": [str(10)],
            "ends": [str(15)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": [1],
            "start_TSS": "1",
            "with_TSS": ["TSS:3_+"],
            "srna": ["sRNA:2-5_+"],
            "candidate": ["2-6_TSS:3_RBS:1"]
        }]
        seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"}
        wigs = {"forward": "wigs_f", "reverse": "wigs_r"}
        args = self.mock_args.mock()
        args.print_all = True
        args.min_rbs = 0
        args.max_rbs = 20
        args.min_len = 0
        args.max_len = 300
        args.table_best = True
        args.background = 10
        sd.coverage_and_output(sorfs, "median", wigs, out_g, out_t, "best",
                               seq, "cover", args, "texs", "final")
        sd.get_coverage = copy.deepcopy(get_coverage)
        self.assertEqual(out_g.getvalue(), (
            "##gff-version 3\naaa\tANNOgesic\tsORF\t10\t15\t."
            "\t+\t.\tID=aaa_sorf0;Name=sORF_00000;start_TSS=1;"
            "with_TSS=TSS:3_+;sORF_type=3utr;sRNA=NA;rbs=RBS_1;frame_shift=1\n"
        ))
        self.assertEqual(out_t.getvalue().split("\n")[1], (
            "aaa\tsORF_00000\t10\t15\t+\t3'UTR_derived\tTSS:3_+"
            "\tRBS_1\t10\t15\tNA\t1\tFragmented\t20\t\tGCTATG\t10-15_TSS:3_+_RBS:1"
        ))

    def test_detect_inter_type(self):
        inter_dict = [{
            "seq_id": "aaa",
            "source": "UTR_derived",
            "feature": "Transcript",
            "start": 1,
            "end": 23,
            "phase": ".",
            "strand": "+",
            "score": "."
        }]
        attributes_inter = [{
            "ID": "tran0",
            "Name": "Transcript_0",
            "UTR_type": "3utr"
        }]
        inters = []
        inters.append(
            Create_generator(inter_dict[0], attributes_inter[0], "gff"))
        sd.get_coverage = self.mock.mock_get_coverage
        wigs = {"forward": "wigs_f", "reverse": "wigs_r"}
        data = sd.detect_inter_type(inters, wigs, "test")
        self.assertDictEqual(
            data, {'aaa': {
                'interCDS': [],
                '5utr': [],
                '3utr': ['2']
            }})
        sd.get_coverage = copy.deepcopy(get_coverage)

    def test_set_median(self):
        mediandict = {}
        covers = {"aaa": {"3utr": {"track_1": [1, 3, 4, 2, 55]}}}
        coverages = {"3utr": "p_0.5", "5utr": "p_0.5", "interCDS": "n_100"}
        sd.set_median(covers, mediandict, coverages)
        self.assertDictEqual(
            mediandict, {
                'aaa': {
                    '5utr': {},
                    'interCDS': {},
                    '3utr': {
                        'track_1': {
                            'median': 3
                        }
                    }
                }
            })

    def test_compute_candidate_best(self):
        sorfs = [{
            "strain": "aaa",
            "strand": "+",
            "start": 2,
            "end": 6,
            "starts": [str(2)],
            "ends": [str(10)],
            "seq": "ATGTA",
            "type": "3utr",
            "print": False,
            "rbs": ["1"],
            "start_TSS": "1",
            "with_TSS": ["TSS:3_+"],
            "srna": ["sRNA:2-5_+"]
        }]
        sd.compute_candidate_best(sorfs)
        self.assertListEqual(sorfs, [{
            'starts': ['2'],
            'seq': 'ATGTA',
            'strain': 'aaa',
            'ends': ['10'],
            'print': False,
            'rbs': ['1'],
            'type': '3utr',
            'end': 6,
            'start': 2,
            'srna': ['sRNA:2-5_+'],
            'candidate': ['2-6_TSS:1_RBS:1'],
            'start_TSS': '1',
            'strand': '+',
            'with_TSS': ['TSS:3_+']
        }])

    def test_sorf_detection(self):
        fasta = os.path.join(self.fasta, "fasta")
        gen_file(fasta, ">aaa\nTAGGAGGCCGCTATGCCATTA")
        srna_gff = os.path.join(self.gff, "srna.gff")
        inter_gff = os.path.join(self.gff, "inter.gff")
        tss_file = os.path.join(self.gff, "tss.gff")
        sd.get_coverage = self.mock.mock_get_coverage
        sd.read_libs = self.mock.mock_read_libs
        sd.read_wig = self.mock.mock_read_wig
        sd.get_inter_coverage = self.mock.mock_get_inter_coverage
        gen_file(srna_gff, self.example.srna)
        gen_file(inter_gff, self.example.inter)
        gen_file(tss_file, self.example.tss)
        args = self.mock_args.mock()
        args.start_codon = ["ATG"]
        args.stop_codon = ["TTA"]
        args.cutoff_5utr = "p_0.5"
        args.cutoff_intercds = "n_20"
        args.cutoff_3utr = "n_11"
        args.cutoff_inter = 50
        args.cutoff_anti = 50
        args.libs = ["frag:frag:1:a:+"]
        args.merge_wigs = "wig_folder"
        args.utr_detect = True
        args.background = 10
        args.print_all = True
        sd.sorf_detection(fasta, srna_gff, inter_gff, tss_file, "wig_f_file",
                          "wig_r_file", "test_folder/test", args)
        sd.get_coverage = copy.deepcopy(get_coverage)
        sd.replicate_comparison = self.mock.mock_replicate_comparison
        self.assertTrue(os.path.exists("test_folder/test_all.csv"))
        self.assertTrue(os.path.exists("test_folder/test_all.gff"))
        self.assertTrue(os.path.exists("test_folder/test_best.csv"))
        self.assertTrue(os.path.exists("test_folder/test_best.gff"))
Exemplo n.º 54
0
class TestConverter(unittest.TestCase):
    def setUp(self):
        self.converter = Converter()
        self.example = Example()
        self.converter.gff3parser = Mock_gff3_parser
        self.converter._print_rntptt_title = Mock_func().print_rntptt_title
        self.converter.tsspredator = Mock_TSSPredatorReader()
        self.converter._read_file = Mock_func().mock_read_file
        self.gff_file = self.example.gff_file
        self.ptt_out = self.example.ptt_out
        self.rnt_out = self.example.rnt_out
        self.srna_out = self.example.srna_out
        self.embl_file = self.example.embl_file
        self.embl_out = self.example.embl_out
        self.multi_embl = self.example.multi_embl
        self.gff_out = self.example.gff_out
        self.mastertable = self.example.mastertable
        self.tss_file = self.example.tss_file
        self.fasta_file = self.example.fasta_file
        self.transterm = self.example.transterm
        self.term_file = self.example.term_file
        self.circ_file = self.example.circrna_table
        self.circ_all = self.example.circrna_all
        self.circ_best = self.example.circrna_best
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_print_rntptt_file(self):
        cdss = []
        genes = []
        rnas = []
        gff_dict = Example().gff_dict
        for gff in gff_dict:
            if gff["feature"] == "gene":
                genes.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "CDS":
                cdss.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "tRNA":
                rnas.append(self.converter.gff3parser.entries(self, gff))
        out_p = StringIO()
        out_r = StringIO()
        self.converter._print_rntptt_file(out_p, cdss, genes)
        self.converter._print_rntptt_file(out_r, rnas, genes)
        self.assertEqual(out_p.getvalue().split("\n")[:-1],
                         self.ptt_out.split("\n"))
        self.assertEqual(out_r.getvalue().split("\n")[:-1],
                         self.rnt_out.split("\n"))
        out_p.close()
        out_r.close()

    def test_srna2pttrnt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        srnas = []
        self.converter._srna2rntptt(srna_input_file, srna_output_file, srnas,
                                    1234567)
        datas = import_data(srna_output_file)
        self.assertEqual(set(datas), set(self.srna_out.split("\n")))

    def test_multi_embl_pos(self):
        embls = []
        for line in self.embl_file.split("\n"):
            datas = self.converter._multi_embl_pos(line.strip())
            if datas != "Wrong":
                embls.append(datas)
        for index in range(0, 7):
            self.assertDictEqual(embls[index], self.embl_out[index])
        for index in range(0, 2):
            self.assertDictEqual(embls[-1]["pos"][index],
                                 self.multi_embl[index])

    def test_parser_embl_data(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        embl_out = os.path.join(self.test_folder, "test.embl_out")
        out = StringIO()
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        info = self.converter._parser_embl_data(embl_file, out)
        datas = out.getvalue().split("\n")
        self.assertEqual(set(datas[:-1]), set(self.gff_out.split("\n")))
        self.assertEqual(info[0], "NC_007795.1")
        for index in range(0, 2):
            self.assertDictEqual(info[1]["pos"][index], self.multi_embl[index])
        out.close()

    def test_multi_tss_class(self):
        nums = {"tss": 0, "tss_uni": 0, "class": 1}
        utrs = {"total": [], "pri": [], "sec": []}
        tss_features = {"tss_types": [], "locus_tags": [], "utr_lengths": []}
        tss_index = defaultdict(lambda: 0)
        master_file = os.path.join(self.test_folder, "test.tsv")
        fh = StringIO(self.mastertable)
        for tss in self.converter.tsspredator.entries(fh):
            self.converter._multi_tss_class(tss, tss_index, tss_features, nums,
                                            utrs)
        fh.close()
        self.assertDictEqual(nums, {'tss_uni': 0, 'class': 5, 'tss': 2})

    def test_convert_mastertable2gff(self):
        master_file = os.path.join(self.test_folder, "test.tsv")
        with open(master_file, "w") as th:
            th.write(self.mastertable)
        out_gff = os.path.join(self.test_folder, "test.tsv_out")
        self.converter.convert_mastertable2gff(master_file, "ANNOgesic", "TSS",
                                               "aaa", out_gff)
        datas = import_data(out_gff)
        self.assertEqual(set(datas), set(self.tss_file.split("\n")))

    def test_convert_gff2rntptt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        gff_file = os.path.join(self.test_folder, "test.gff")
        rnt_file = os.path.join(self.test_folder, "test.rnt")
        ptt_file = os.path.join(self.test_folder, "test.ptt")
        fasta_file = os.path.join(self.test_folder, "test.fa")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        with open(gff_file, "w") as fh:
            fh.write(self.gff_file)
        with open(fasta_file, "w") as fh:
            fh.write(self.fasta_file)
        self.converter.convert_gff2rntptt(gff_file, fasta_file, ptt_file,
                                          rnt_file, srna_input_file,
                                          srna_output_file)
        self.assertTrue(srna_output_file)
        self.assertTrue(rnt_file)
        self.assertTrue(ptt_file)

    def test_convert_embl2gff(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        gff_file = os.path.join(self.test_folder, "test.embl_out")
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        self.converter.convert_embl2gff(embl_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas[1:-2]), set(self.gff_out.split("\n")))

    def test_convert_transtermhp2gff(self):
        transterm_file = os.path.join(self.test_folder,
                                      "test_best_terminator_after_gene.bag")
        gff_file = os.path.join(self.test_folder, "transterm.gff")
        with open(transterm_file, "w") as th:
            th.write(self.transterm)
        self.converter.convert_transtermhp2gff(transterm_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas), set(self.term_file.split("\n")))

    def get_info(datas):
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        return f_datas

    def test_convert_circ2gff(self):
        circ_file = os.path.join(self.test_folder, "circ.csv")
        out_all = os.path.join(self.test_folder, "all.gff")
        out_filter = os.path.join(self.test_folder, "best.gff")
        with open(circ_file, "w") as ch:
            ch.write(self.circ_file)
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        self.converter.convert_circ2gff(circ_file, args, out_all, out_filter)
        datas = import_data(out_all)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_all.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
        datas = import_data(out_filter)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_best.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
Exemplo n.º 55
0
class TestsRNAUTR(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock = Mock_func()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_data(self):
        pos = {"start": 4, "end": 40, "ori_start": 2, "ori_end": 3}
        datas = sud.import_data("+", "aaa", pos, "3UTR", "TSS", "cds",
                                "srna_cover", "test")
        self.assertDictEqual(
            datas, {
                'start_cleavage': 'NA',
                'strand': '+',
                'end_cleavage': 'test',
                'start_tss': 'cds',
                'end': 40,
                'start': 4,
                'utr': '3UTR',
                'strain': 'aaa',
                'datas': 'srna_cover'
            })

    def test_read_data(self):
        args = self.mock_args.mock()
        args.gff_file = os.path.join(self.test_folder, "test.gff")
        args.ta_file = os.path.join(self.test_folder, "test.gff")
        args.tss_file = os.path.join(self.test_folder, "test.gff")
        args.pro_file = os.path.join(self.test_folder, "test.gff")
        args.seq_file = os.path.join(self.test_folder, "test.fa")
        gen_file(args.gff_file, self.example.gff_file)
        gen_file(args.seq_file, self.example.seq_file)
        args.hypo = False
        cdss, tas, tsss, pros, seq = sud.read_data(args)
        self.assertEqual(cdss[0].start, 4)
        self.assertEqual(tas[0].start, 4)
        self.assertEqual(tsss[0].start, 4)
        self.assertEqual(pros[0].start, 4)
        self.assertDictEqual(seq, {
            'aaa':
            'ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT'
        })

    def test_get_terminal(self):
        inters = []
        seq = {
            "aaa":
            "ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT"
        }
        sud.get_terminal(self.example.gffs, inters, seq, "start")
        self.assertListEqual(inters, [{
            'end': 4,
            'len_CDS': 0,
            'strand': '+',
            'strain': 'aaa',
            'start': 1
        }])

    def test_get_inter(self):
        inters = []
        sud.get_inter(self.example.gffs, inters)
        self.assertListEqual(inters, [{
            'start': 14,
            'strand': '+',
            'end': 20,
            'strain': 'aaa',
            'len_CDS': 10
        }])

    def test_set_cover_and_point(self):
        covers = [2, 3, 4, 1, 6, 2, 8, 3, 5, 6, 7, 5, 2, 1]
        cover_results = {"covers": None, "check_point": None}
        pos = {"start": 2, "end": 6, "ori_start": 2, "ori_end": 3}
        sud.set_cover_and_point(cover_results, self.example.inters[0], covers,
                                pos, 5)
        self.assertListEqual(cover_results["covers"],
                             [2, 3, 4, 1, 6, 2, 8, 3, 5])
        self.assertDictEqual(cover_results["check_point"], {
            'srna_start': 0,
            'utr_start': 2,
            'utr_end': 3,
            'srna_end': 12
        })

    def test_check_import_srna_covers(self):
        args = self.mock_args.mock()
        cover = {"type": "5utr"}
        datas = {
            "num": 0,
            "cover_tmp": {
                "total": 100,
                "ori_total": 200
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 3,
                "end": 23
            }
        }
        cover_results = {
            "cover_sets": {
                "high": 50,
                "low": 10
            },
            "srna_covers": {
                "cond_1": []
            },
            "utr_covers": {
                "cond_1": []
            },
            "type": "5utr",
            "intercds": "TSS"
        }
        args.min_len = 30
        args.max_len = 500
        pos = {"start": 1, "end": 25, "ori_start": 1, "ori_end": 25}
        sud.check_import_srna_covers(datas, cover_results,
                                     self.example.inters[0], "cond_1", "track",
                                     cover, pos, args, "5utr")
        self.assertDictEqual(datas["final_poss"], {'end': 23, 'start': 3})
        self.assertDictEqual(
            cover_results["srna_covers"], {
                'cond_1': [{
                    'final_start': 3,
                    'high': 50,
                    'ori_avg': 8.0,
                    'final_end': 23,
                    'low': 10,
                    'type': '5utr',
                    'avg': 4,
                    'track': 'track'
                }]
            })
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])

        datas["checks"] = {"detect_decrease": False}
        cover_results["srna_covers"] = {"cond_1": []}
        cover_results["utr_covers"] = {"cond_1": []}
        sud.check_import_srna_covers(datas, cover_results,
                                     self.example.inters[0], "cond_1", "track",
                                     cover, pos, args, "5utr")
        self.assertDictEqual(cover_results["srna_covers"], {'cond_1': []})

    def test_check_pos(self):
        cover = {"pos": 4}
        check_point = {
            "utr_start": 1,
            "utr_end": 29,
            "srna_start": 3,
            "srna_end": 11
        }
        checks = {"srna": False, "utr": False}
        sud.check_pos(cover, check_point, checks, 4)
        self.assertDictEqual(checks, {'srna': True, 'utr': True})

    def test_get_cover_5utr(self):
        args = self.mock_args.mock()
        datas = {
            "num": 0,
            "cover_tmp": {
                "5utr": 0
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 1,
                "end": 26
            }
        }
        cover = 20
        cover_sets = {"high": 50, "low": 10}
        args.decrease_utr = 50
        args.fuzzy_utr = 2
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 10})
        self.assertEqual(datas["num"], 0)
        self.assertTrue(go_out)
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 0})
        self.assertDictEqual(cover_sets, {'high': 50, 'low': 10})
        cover = 20
        datas = {
            "num": 0,
            "cover_tmp": {
                "5utr": 30
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 1,
                "end": 26
            }
        }
        cover_sets = {"low": 10, "high": 50}
        args.decrease_utr = 0.5
        go_out = sud.get_cover_5utr(datas, cover_sets, cover,
                                    self.example.inters[0], args, 10)
        self.assertEqual(datas["num"], 1)
        self.assertFalse(go_out)
        self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 26})
        self.assertDictEqual(datas["cover_tmp"], {'5utr': 20})
        self.assertDictEqual(cover_sets, {'low': 20, 'high': 50})

    def test_detect_cover_utr_srna(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        cover_results = {
            "cover_sets": {
                "low": 10,
                "high": 50
            },
            "pos": {
                "low": 10,
                "high": 50
            },
            "covers": [20],
            "type": "5utr",
            "srna_covers": {
                "frag_1": []
            },
            "utr_covers": {
                "frag_1": []
            },
            "intercds": "TSS",
            "check_point": {
                "utr_start": 1,
                "utr_end": 29,
                "srna_start": 2,
                "srna_end": 25
            }
        }
        datas = {
            "num": 0,
            "cover_tmp": {
                "total": 100,
                "ori_total": 200
            },
            "checks": {
                "detect_decrease": True
            },
            "final_poss": {
                "start": 3,
                "end": 23
            }
        }
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 23}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        sud.detect_cover_utr_srna(cover_results, pos, self.example.inters[0],
                                  "frag_1", "track_1", args, "frag", 2, 20,
                                  "+")
        self.assertDictEqual(
            cover_results["srna_covers"], {
                'frag_1': [{
                    'low': 20,
                    'high': 50,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 0.8695652173913043,
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 1.0526315789473684
                }]
            })
        self.assertDictEqual(cover_results["utr_covers"],
                             cover_results["srna_covers"])
        self.assertDictEqual(cover_results["cover_sets"], {
            'best': 20,
            'low': 20,
            'high': 50
        })

    def test_get_coverage(self):
        sud.coverage_comparison = self.mock.mock_coverage_comparison
        sud.detect_cover_utr_srna = self.mock.mock_detect_cover_utr_srna
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        srna_covers, utr_covers = sud.get_coverage(self.example.wigs,
                                                   self.example.inters[0], pos,
                                                   "3utr", "TSS", args)
        self.assertDictEqual(
            srna_covers, {
                'frag_1': [{
                    'track': 'track_1',
                    'high': 50,
                    'final_start': 2,
                    'type': 'frag',
                    'avg': 8.052631578947368,
                    'low': 10,
                    'final_end': 3,
                    'ori_avg': 2.12
                }]
            })
        self.assertDictEqual(utr_covers, srna_covers)

    def test_get_utr_cutoff(self):
        mediandict = {"aaa": {"5utr": {"bbb": {}}}}
        avgs = [30, 60, 550, 302, 44]
        sud.get_utr_cutoff("p_0.5", mediandict, avgs, "aaa", "5utr", "bbb")
        self.assertDictEqual(
            mediandict,
            {'aaa': {
                '5utr': {
                    'bbb': {
                        'mean': 197.2,
                        'median': 60
                    }
                }
            }})

    def test_detect_normal(self):
        sud.get_coverage = self.mock.mock_get_coverage
        diff = 50
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.utrs = []
        args.srnas = []
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos,
                          "3utr", self.example.tsss[0], args)
        self.assertListEqual(args.srnas, [{
            'end': 20,
            'strand': '+',
            'datas': {
                'frag_1': [{
                    'track': 'track_1',
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'high': 50,
                    'type': 'frag',
                    'final_end': 20,
                    'ori_avg': 27.52,
                    'low': 10
                }]
            },
            'end_cleavage': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'strain': 'aaa',
            'start': 2,
            'start_tss': 'TSS:1_+'
        }])
        self.assertListEqual(args.utrs, [{
            'end': 20,
            'strand': '+',
            'datas': {
                'frag_1': [{
                    'track': 'track_1',
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'high': 50,
                    'type': 'frag',
                    'final_end': 20,
                    'ori_avg': 27.52,
                    'low': 10
                }]
            },
            'end_cleavage': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'strain': 'aaa',
            'start': 2,
            'start_tss': 'NA'
        }])
        args.utrs = []
        args.srnas = []
        args.pros = self.example.pros
        args.min_len = 3
        args.max_len = 20
        pos = {"start": 2, "end": 24, "ori_start": 1, "ori_end": 25}
        sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos,
                          "3utr", self.example.tsss[0], args)
        self.assertListEqual(args.srnas, [{
            'start': 1,
            'end': 18,
            'start_tss': 'TSS:1_+',
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'track': 'track_1',
                    'high': 50,
                    'low': 10,
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'final_start': 2
                }]
            },
            'start_cleavage': 'NA',
            'end_cleavage': 'Cleavage:18_+',
            'utr': '3utr',
            'strand': '+',
            'strain': 'aaa'
        }])
        sud.get_coverage = get_coverage

    def test_detect_3utr_pro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 1
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = self.example.pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25}
        sud.detect_3utr_pro(self.example.inters[0], pos, self.example.wigs,
                            "3utr", args)
        self.assertListEqual(args.srnas, [{
            'end_cleavage': 'NA',
            'end': 20,
            'start_cleavage': 'Cleavage:18_+',
            'utr': '3utr',
            'datas': {
                'frag_1': [{
                    'low': 10,
                    'final_start': 2,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'ori_avg': 27.52,
                    'high': 50
                }]
            },
            'strand': '+',
            'start_tss': 'NA',
            'start': 18,
            'strain': 'aaa'
        }])
        self.assertListEqual(args.utrs, [{
            'end_cleavage': 'NA',
            'end': 20,
            'start_cleavage': 'NA',
            'utr': '3utr',
            'datas': {
                'frag_1': [{
                    'low': 10,
                    'final_start': 2,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'ori_avg': 27.52,
                    'high': 50
                }]
            },
            'strand': '+',
            'start_tss': 'NA',
            'start': 18,
            'strain': 'aaa'
        }])
        sud.get_coverage = get_coverage

    def test_detect_twopro(self):
        sud.get_coverage = self.mock.mock_get_coverage
        pro_dict = [{
            "seq_id": "aaa",
            "source": "tsspredator",
            "feature": "processing",
            "start": 18,
            "end": 18,
            "phase": ".",
            "strand": "+",
            "score": "."
        }, {
            "seq_id": "aaa",
            "source": "tsspredator",
            "feature": "processing",
            "start": 38,
            "end": 38,
            "phase": ".",
            "strand": "+",
            "score": "."
        }]
        attributes_pro = [{
            "ID": "processing0",
            "Name": "Processing_0"
        }, {
            "ID": "processing1",
            "Name": "Processing_1"
        }]
        pros = []
        for index in range(0, 2):
            pros.append(
                Create_generator(pro_dict[index], attributes_pro[index],
                                 "gff"))
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 3
        args.fuzzy_tsss = {"3utr": 3}
        args.pros = pros
        args.utrs = []
        args.srnas = []
        pos = {"start": 2, "end": 50, "ori_start": 1, "ori_end": 25}
        sud.detect_twopro(self.example.inters[0], pos, self.example.wigs,
                          "interCDS", "interCDS", args)
        self.assertListEqual(args.srnas, [{
            'start_cleavage': 'Cleavage:18_+',
            'utr': 'interCDS',
            'datas': {
                'frag_1': [{
                    'type': 'frag',
                    'low': 10,
                    'final_start': 2,
                    'high': 50,
                    'avg': 41.36842105263158,
                    'final_end': 20,
                    'track': 'track_1',
                    'ori_avg': 27.52
                }]
            },
            'start_tss': 'NA',
            'end_cleavage': 'Cleavage:38_+',
            'strand': '+',
            'end': 38,
            'strain': 'aaa',
            'start': 18
        }])
        self.assertListEqual(args.utrs, [{
            'start_cleavage': 'NA',
            'utr': 'interCDS',
            'datas': {
                'frag_1': [{
                    'type': 'frag',
                    'low': 10,
                    'final_start': 2,
                    'high': 50,
                    'avg': 41.36842105263158,
                    'final_end': 20,
                    'track': 'track_1',
                    'ori_avg': 27.52
                }]
            },
            'start_tss': 'NA',
            'end_cleavage': 'Cleavage:38_+',
            'strand': '+',
            'end': 38,
            'strain': 'aaa',
            'start': 18
        }])
        sud.get_coverage = get_coverage

    def test_run_utr_detection(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"5utr": "n_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        sud.get_coverage = self.mock.mock_get_coverage
        sud.run_utr_detection(self.example.wigs, self.example.inters[0], 2, 50,
                              "5utr", args)
        sud.get_coverage = get_coverage
        self.assertListEqual(args.srnas, [{
            'start': 1,
            'end': 50,
            'start_cleavage': 'NA',
            'datas': {
                'frag_1': [{
                    'high': 50,
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'low': 10,
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            },
            'start_tss': 'TSS:1_+',
            'strain': 'aaa',
            'strand': '+',
            'utr': '5utr',
            'end_cleavage': 'NA'
        }])
        self.assertListEqual(args.utrs, [{
            'start': 1,
            'end': 50,
            'start_cleavage': 'NA',
            'datas': {
                'frag_1': [{
                    'high': 50,
                    'final_end': 20,
                    'avg': 41.36842105263158,
                    'low': 10,
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            },
            'start_tss': 'NA',
            'strain': 'aaa',
            'strand': '+',
            'utr': '5utr',
            'end_cleavage': 'NA'
        }])

    def test_class_utr(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.fuzzy_tsss = {"3utr": "p_3"}
        args.utrs = []
        args.srnas = []
        args.tsss = self.example.tsss
        args.pros = self.example.pros
        args.wig_fs = self.example.wigs
        sud.get_coverage = self.mock.mock_get_coverage
        sud.class_utr(self.example.inters[0], self.example.tas[0], args,
                      args.wig_fs, args.wig_fs)
        sud.get_coverage = get_coverage
        self.assertListEqual(args.srnas, [{
            'end_cleavage': 'NA',
            'start_tss': 'TSS:1_+',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 1,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }, {
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'Cleavage:18_+',
            'end': 20,
            'start': 18,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }])
        self.assertListEqual(args.utrs, [{
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 1,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }, {
            'end_cleavage': 'NA',
            'start_tss': 'NA',
            'utr': '3utr',
            'start_cleavage': 'NA',
            'end': 20,
            'start': 18,
            'datas': {
                'frag_1': [{
                    'ori_avg': 27.52,
                    'final_start': 2,
                    'avg': 41.36842105263158,
                    'track': 'track_1',
                    'type': 'frag',
                    'final_end': 20,
                    'low': 10,
                    'high': 50
                }]
            },
            'strain': 'aaa',
            'strand': '+'
        }])

    def test_get_utr_coverage(self):
        utrs = [{
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'NA'
        }]
        covers = sud.get_utr_coverage(utrs)
        self.assertDictEqual(covers, {
            'aaa': {
                'interCDS': {},
                '3utr': {
                    'track_1': [27.52]
                },
                '5utr': {}
            }
        })

    def test_set_cutoff(self):
        args = self.mock_args.mock()
        args.texs = {"track_4@AND@track_6": 0}
        covers = {
            'aaa': {
                '5utr': {
                    'track_4': [52, 11, 23]
                },
                'inter': {
                    'track_3': [111]
                },
                'total': {
                    'track_1': [27.52, 111]
                },
                '3utr': {
                    'track_1': [27.52, 111]
                },
                'interCDS': {
                    'track_2': [12, 0]
                }
            }
        }
        args.coverages = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"}
        args.cover_notex = {
            "5utr": "p_0.3",
            "3utr": "n_10",
            "interCDS": "p_0.5"
        }
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(
            mediandict, {
                'aaa': {
                    '5utr': {
                        'track_4': {
                            'median': 11,
                            'mean': 28.666666666666668
                        }
                    },
                    'interCDS': {
                        'track_2': {}
                    },
                    '3utr': {
                        'track_1': {}
                    }
                }
            })
        args.cover_notex = None
        mediandict = sud.set_cutoff(covers, args)
        self.assertDictEqual(
            mediandict, {
                'aaa': {
                    '3utr': {
                        'track_1': {
                            'mean': 69.26,
                            'median': 10.0
                        }
                    },
                    '5utr': {
                        'track_4': {
                            'mean': 28.666666666666668,
                            'median': 11
                        }
                    },
                    'interCDS': {
                        'track_2': {
                            'mean': 6.0,
                            'median': 0
                        }
                    }
                }
            })

    def test_mean_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        mean = sud.mean_score(lst)
        self.assertEqual(mean, 5.0)

    def test_median_score(self):
        lst = [1, 3, 5, 6, 7, 8]
        median = sud.median_score(lst, 0.5)
        self.assertEqual(median, 5)

    def test_detect_srna(self):
        sud.replicate_comparison = self.mock.mock_replicate_comparison
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        median = {"aaa": {"3utr": 555}}
        args.srnas = [{
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50,
                    "conds": ["frag"]
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'Cleavage:18_+'
        }]
        sud.detect_srna(median, args)
        self.assertEqual(
            args.out.getvalue(),
            "aaa\tANNOgesic\tncRNA\t18\t20\t.\t+\t.\tID=aaa_srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n"
        )
        self.assertEqual(
            args.out_t.getvalue(),
            "aaa\t00000\t18\t20\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n"
        )

    def test_print_file(self):
        args = self.mock_args.mock()
        args.min_len = 1
        args.max_len = 300
        args.decrease_utr = 0.5
        args.fuzzy_utr = 2
        args.coverages = "cover"
        args.texs = "template_texs"
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.table_best = True
        args.out = StringIO()
        args.out_t = StringIO()
        srna = {
            'strand': '+',
            'utr': '3utr',
            'end': 20,
            'start': 18,
            'start_tss': 'NA',
            'datas': {
                'frag_1': [{
                    'final_end': 20,
                    'track': 'track_1',
                    'final_start': 2,
                    'ori_avg': 27.52,
                    'avg': 41.36842105263158,
                    'type': 'frag',
                    'low': 10,
                    'high': 50,
                    "conds": ["frag"]
                }]
            },
            'end_cleavage': 'NA',
            'strain': 'aaa',
            'start_cleavage': 'Cleavage:18_+'
        }
        srna_datas = {
            "best": 500,
            "track": "frag",
            "high": 700,
            "low": 400,
            "start": 100,
            "end": 202,
            "conds": {
                "frag_1": "track_1"
            }
        }
        sud.print_file(0, srna, 2, 50, srna_datas, args)
        self.assertEqual(
            args.out.getvalue(),
            "aaa\tANNOgesic\tncRNA\t2\t50\t.\t+\t.\tID=aaa_srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n"
        )
        self.assertEqual(
            args.out_t.getvalue(),
            "aaa\t00000\t2\t50\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n"
        )
Exemplo n.º 56
0
class TestsTSSpredator(unittest.TestCase):
    def setUp(self):
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.mock_parser = Mock_Multiparser()
        self.example = Example()
        self.test_folder = "test_folder"
        self.trans = "test_folder/trans"
        self.out = "test_folder/output"
        self.gffs = "test_folder/gffs"
        self.tsss = "test_folder/tsss"
        self.terms = "test_folder/terms"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.trans)
            os.mkdir(os.path.join(self.trans, "tmp"))
            os.mkdir(self.out)
            os.mkdir(self.gffs)
            os.mkdir(self.tsss)
            os.mkdir(os.path.join(self.tsss, "tmp"))
            os.mkdir(self.terms)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.trans = self.trans
        args.out_folder = self.out
        self.utr = UTRDetection(args)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_compute_utr(self):
        ut.detect_5utr = self.mock.mock_detect_5utr
        ut.detect_3utr = self.mock.mock_detect_3utr
        term_path = os.path.join(self.terms, "tmp")
        os.mkdir(term_path)
        utr5_path = os.path.join(self.out, "5UTRs")
        utr3_path = os.path.join(self.out, "3UTRs")
        os.mkdir(utr5_path)
        os.mkdir(utr3_path)
        os.mkdir(os.path.join(utr5_path, "gffs"))
        os.mkdir(os.path.join(utr3_path, "gffs"))
        utr5_stat_path = os.path.join(utr5_path, "statistics")
        utr3_stat_path = os.path.join(utr3_path, "statistics")
        os.mkdir(utr5_stat_path)
        os.mkdir(utr3_stat_path)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(term_path, "test_term.gff"),
                 self.example.term_file)
        args = self.mock_args.mock()
        args.gffs = self.gffs
        args.tsss = self.tsss
        args.trans = self.trans
        args.terms = self.terms
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.utr._compute_utr(args, log)
        self.assertTrue(
            os.path.exists(os.path.join(utr5_stat_path,
                                        "test_5utr_length.png")))
        self.assertTrue(
            os.path.exists(os.path.join(utr3_stat_path,
                                        "test_3utr_length.png")))
        shutil.rmtree(utr5_path)
        shutil.rmtree(utr3_path)

    def test_run_utr_detection(self):
        self.utr._check_gff = self.mock.mock_check_gff
        ut.detect_5utr = self.mock.mock_detect_5utr
        ut.detect_3utr = self.mock.mock_detect_3utr
        utr5_path = os.path.join(self.out, "5UTRs")
        utr3_path = os.path.join(self.out, "3UTRs")
        os.mkdir(utr5_path)
        os.mkdir(utr3_path)
        os.mkdir(os.path.join(utr5_path, "gffs"))
        os.mkdir(os.path.join(utr3_path, "gffs"))
        utr5_stat_path = os.path.join(utr5_path, "statistics")
        utr3_stat_path = os.path.join(utr3_path, "statistics")
        os.mkdir(utr5_stat_path)
        os.mkdir(utr3_stat_path)
        gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file)
        gen_file(os.path.join(self.trans, "test_transcript.gff"),
                 self.example.tran_file)
        gen_file(os.path.join(self.tsss, "test_TSS.gff"),
                 self.example.tss_file)
        gen_file(os.path.join(self.terms, "test_term.gff"),
                 self.example.term_file)
        args = self.mock_args.mock()
        args.tsss = self.tsss
        args.gffs = self.gffs
        args.trans = self.trans
        args.terms = self.terms
        args.out_folder = self.out
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.utr.run_utr_detection(args, log)
        self.assertTrue(
            os.path.exists(os.path.join(utr5_stat_path,
                                        "test_5utr_length.png")))
        self.assertTrue(
            os.path.exists(os.path.join(utr3_stat_path,
                                        "test_3utr_length.png")))
Exemplo n.º 57
0
class TestGensRNAOutput(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_merge_info(self):
        blasts = [{"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "111"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "222"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "333"},
                  {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "444"},
                  {"strain": "bbb", "strand": "+", "start": 20, "end": 70, "hits": "555"}]
        merge = gso.merge_info(blasts)
        self.assertDictEqual(merge[0], {'hits': '111;222;333', 'start': 20, 'strand': '+', 'strain': 'aaa', 'end': 70})
        self.assertDictEqual(merge[1], {'hits': '555', 'start': 20, 'strand': '+', 'strain': 'bbb', 'end': 70})

    def test_compare_srna_table(self):
        final = {"energy": -23, "utr": "3UTR"}
        srna_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 300,
                     "end": 367, "phase": ".", "strand": "+", "score": "."}
        attributes_srna = {"ID": "srna0", "Name": "sRNA_0"}
        srna = Create_generator(srna_dict, attributes_srna, "gff")
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        new_final = gso.compare_srna_table(self.example.srna_tables, srna, final, args)
        self.assertDictEqual(new_final, {'end_pro': 'NA', 'strand': '+', 'strain': 'aaa',
                                         'avg': 100, 'type': 'TEX+/-;Fragmented',
                                         'conds': 'tex_frag', 'candidates': '300-367',
                                         'tss_pro': 'TSS:300_+', 'start': 300, 'utr': '3UTR',
                                         'energy': -23, 'end': 367})

    def test_compare(self):
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        finals = gso.compare(self.example.srnas, self.example.srna_tables,
                             self.example.nr_blasts, self.example.srna_blasts, args)
        for index in range(len(finals)):
            self.assertDictEqual(finals[index], self.example.finals[index])

    def test_gen_best_srna(self):
        gso.read_gff = Mock_func().mock_read_gff
        args = self.mock_args.mock()
        args.min_len = 30
        args.max_len = 500
        args.nr_hits_num = 0
        args.energy = 0
        args.import_info = ["term", "sec_str", "sorf"]
        out_file = os.path.join(self.test_folder, "test.out")
        gso.gen_best_srna("test.srna", out_file, args)
        with open(out_file) as fh:
            for line in fh:
                if not (line.startswith("#")):
                    data = "\t".join(line.split("\t")[:-1])
        self.assertEqual(data, "bbb\tintergenic\tsRNA\t18\t50\t.\t-\t.")
Exemplo n.º 58
0
class TestsRNAIntergenic(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.mock = Mock_func()
        self.test_folder = "test_folder"
        self.wig_folder = "test_folder/wigs"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.wig_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_data(self):
        gff_file = os.path.join(self.test_folder, "anno.gff")
        tran_file = os.path.join(self.test_folder, "tran.gff")
        pro_file = os.path.join(self.test_folder, "pro.gff")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        args.ex_srna = False
        nums, cdss, tas, pros, genes, ncs = si.read_data(args)
        self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0})
        self.assertEqual(cdss[0].start, 140)
        self.assertEqual(tas[0].start, 140)
        self.assertEqual(pros[0].start, 140)

    def test_read_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        tsss, num_tss = si.read_tss(tss_file)
        self.assertEqual(tsss[0].start, 140)

    def test_compare_ta_cds(self):
        detects = {"overlap": False}
        gffs = copy.deepcopy(self.example.gffs)
        tas = copy.deepcopy(self.example.tas)
        si.compare_ta_cds(gffs, tas[0], detects)
        self.assertDictEqual(detects, {'overlap': True})

    def test_compare_ta_tss(self):
        out_table = StringIO()
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        si.get_coverage = self.mock.mock_get_coverage
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        tsss = copy.deepcopy(self.example.tsss)
        si.compare_ta_tss(10, 2, 15, tas[0], tsss[0], 50, "cutoff", 20, "",
                          args)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t10\t15\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t10\t15\t+\tNA\tNA\t"
                          "NA\tNA\tNA\tTSS:170_+\n"))
        si.get_coverage = get_coverage

    def test_print_file(self):
        string = "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t."
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {
            "high":
            20,
            "low":
            5,
            "best":
            13,
            "conds": {
                "cond1": "test1"
            },
            "detail": [{
                "track": "test1",
                "high": 30,
                "low": 10,
                "avg": 15
            }, {
                "track": "test2",
                "high": 25,
                "low": 13,
                "avg": 20
            }]
        }
        args = self.mock_args.mock()
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.table_best = False
        si.print_file(string, "TSS_160+", srna_datas, "intergenic", args,
                      "aaa")
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t10\t15\t+\tcond1\ttest1\t13\t20\t5\t"
                          "TSS_160+\ttest1(avg=15;high=30;low=10);"
                          "test2(avg=20;high=25;low=13)\n"))
        self.assertEqual(output.getvalue(),
                         ("aaa\tintergenic\tsRNA\t10\t15\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS_160+;best_avg_coverage=13;"
                          "best_high_coverage=20;best_low_coverage=5\n"))

    def test_detect_include_tss(self):
        si.get_coverage = self.mock.mock_get_coverage
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = coverage
        args.file_type = "frag"
        args.break_tran = False
        args.output = output
        args.out_table = out_table
        tas = copy.deepcopy(self.example.tas)
        si.detect_include_tss(tas[0], args, None, args.wigs_f, args.wigs_r)
        si.get_coverage = get_coverage
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA"
                          "\tNA\tNA\tTSS:170_+\n"))

    def test_get_differential_cover(self):
        checks = {"detect_diff": True, "first": True}
        cover_sets = {"diff": 30, "low": 5, "high": 35}
        cover = 20
        poss = {"stop_point": 100}
        args = self.mock_args.mock()
        args.fuzzy_inter = 10
        args.decrease_inter = 200
        si.get_differential_cover(0, checks, cover_sets, poss, cover, args, 80)
        self.assertDictEqual(cover_sets, {'diff': 20, 'low': 20, 'high': 35})
        cover = 50
        poss = {"stop_point": 100}
        num = 20
        args.fuzzy_inter = 20
        si.get_differential_cover(num, checks, cover_sets, poss, cover, args,
                                  80)
        self.assertDictEqual(poss, {"stop_point": 80})

    def test_check_coverage_pos(self):
        si.coverage_comparison = self.mock.mock_coverage_comparison
        cover_sets = {"low": 20, "high": 30, "total": 90, "diff": 50}
        poss = {"high": 20, "low": 70, "stop_point": 70}
        tmps = {"total": 0, "toler": 10, "pos": 0}
        checks = {"detect_diff": True, "first": True}
        cover = {"coverage": 50, "pos": 80}
        detect = si.check_coverage_pos(30, 100, cover, 80, cover_sets, checks,
                                       poss, "+", 5)
        self.assertFalse(detect)
        self.assertDictEqual(poss, {'high': 20, 'stop_point': 70, 'low': 70})

    def test_get_best(self):
        args = self.mock_args.mock()
        args.tolerance = 5
        args.fuzzy_inter = 5
        args.decrease_inter = 50
        datas = si.get_best(self.example.wigs, "aaa", "+", 2, 20, "normal",
                            args, 10)
        self.assertDictEqual(
            datas, {
                'frag_1': [{
                    'low': -1,
                    'high': -1,
                    'avg': 30.7,
                    'pos': 21,
                    'type': 'frag',
                    'track': 'track_1'
                }]
            })

    def test_get_attribute_string(self):
        srna_datas = {'best': 23, 'low': 20, 'high': 35}
        data = si.get_attribute_string(srna_datas, "TSS_100+;Cleavage_150+", 1,
                                       "sRNA_00001", "3utr", "aaa")
        self.assertEqual(data,
                         ("ID=aaa_srna1;Name=sRNA_sRNA_00001;sRNA_type=3utr;"
                          "with_TSS=TSS_100+;end_cleavage=Cleavage_150+;"
                          "best_avg_coverage=23;best_high_coverage=35;"
                          "best_low_coverage=20"))

    def test_check_pro(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        srna_datas = {"pos": 50}
        texs = {"track_1@AND@track_2"}
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tex_notex = "tex_notex"
        args.replicates = "rep"
        args.texs = texs
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        pro_pos, new_srna_datas, detect_pro = si.check_pro(
            tas[0], 20, 70, srna_datas, "within", 5, self.example.wigs, 20,
            args)
        self.assertEqual(pro_pos, 190)
        self.assertDictEqual(
            new_srna_datas, {
                'best': 40,
                'high': 50,
                'low': 10,
                "pos": 5,
                "conds": {
                    "cond1": "test1"
                },
                "detail": None
            })
        self.assertEqual(detect_pro, "Cleavage:190_+")

    def test_exchange_to_pro(self):
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        srna_datas = {"pos": 50, "best": 10, "high": 12}
        args = self.mock_args.mock()
        args.max_len = 300
        args.min_len = 30
        args.table_best = True
        args.replicates = "rep"
        args.tex_notex = "tex_notex"
        args.texs = "texs"
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.tolerance = 5
        si.replicate_comparison = self.mock.mock_replicate_comparison
        detect, srna_datas, pro = si.exchange_to_pro(args, srna_datas, tas[0],
                                                     20, 70, 10,
                                                     self.example.wigs, 20)
        self.assertTrue(detect)
        self.assertDictEqual(
            srna_datas, {
                'best': 40,
                'high': 50,
                'low': 10,
                'pos': 190,
                "conds": {
                    "cond1": "test1"
                },
                "detail": None
            })
        self.assertEqual(pro, "Cleavage:190_+")

    def test_get_tss_type(self):
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        si.check_break_tran = self.mock.mock_check_break_tran
        cover = si.get_tss_type(self.example.tsss[0], coverage, None, None,
                                None, False)
        self.assertEqual(cover, 10)

    def test_detect_wig_pos(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        args = self.mock_args.mock()
        args.texs = "texs"
        args.replicates = "rep"
        args.max_len = 300
        args.min_len = 30
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tex_notex = "tex_notex"
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.table_best = True
        args.nums = nums
        args.out_table = out_table
        args.output = output
        args.tolerance = 5
        si.detect_wig_pos(self.example.wigs, tas[0], 20, 70, "TSS_160+", 10,
                          20, args)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t20\t190\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS_160+;end_cleavage=Cleavage:190_+;"
                          "best_avg_coverage=40;best_high_coverage=50;"
                          "best_low_coverage=10\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t20\t190\t+\tcond1\t"
                          "test1\t40\t50\t10\t\n"))

    def test_detect_longer(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.file_type = "frag"
        args.break_tran = False
        args.detects = detects
        args.cutoff_coverage = coverage
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.notex = 20
        args.output = output
        args.out_table = out_table
        si.get_tss_type = self.mock.mock_get_tss_type
        si.detect_longer(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA"
                          "\tNA\tNA\tTSS:170_+\n"))

    def test_get_proper_tss(self):
        tss_file = os.path.join(self.test_folder, "tss.gff")
        gen_file(tss_file, self.example.gff_file)
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        tsss, num_tss = si.get_proper_tss(tss_file, coverage)
        self.assertEqual(tsss[0].start, 140)

    def test_check_srna_condition(self):
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3}
        out_table = StringIO()
        output = StringIO()
        detects = {"overlap": False, "uni_with_tss": False}
        notex = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 30,
            "orphan": 10
        }
        coverage = {
            "primary": 0,
            "secondary": 0,
            "internal": 0,
            "antisense": 50,
            "orphan": 10
        }
        args = self.mock_args.mock()
        args.tex_notex = "tex_notex"
        args.min_len = 30
        args.max_len = 300
        args.decrease_inter = 50
        args.fuzzy_inter = 5
        args.tolerance = 5
        args.tsss = copy.deepcopy(self.example.tsss)
        args.pros = copy.deepcopy(self.example.pros)
        tas = copy.deepcopy(self.example.tas)
        args.nums = nums
        args.fuzzy = 20
        args.detects = detects
        args.texs = "texs"
        args.replicates = "rep"
        args.table_best = True
        args.wigs_f = ""
        args.wigs_r = ""
        args.file_type = "frag"
        args.break_tran = False
        args.notex = notex
        args.output = output
        args.cutoff_coverage = coverage
        args.out_table = out_table
        si.check_srna_condition(tas[0], args, None, args.wigs_f, args.wigs_r)
        self.assertEqual(output.getvalue(),
                         ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t"
                          "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;"
                          "with_TSS=TSS:170_+\n"))
        self.assertEqual(out_table.getvalue(),
                         ("aaa\t00000\t170\t230\t+\tNA\tNA\t"
                          "NA\tNA\tNA\tTSS:170_+\n"))

    def test_intergenic_srna(self):
        si.read_libs = self.mock.mock_read_libs
        si.read_wig = self.mock.mock_read_wig
        gff_file = os.path.join(self.test_folder, "aaa.gff")
        tss_file = os.path.join(self.test_folder, "aaa_TSS.gff")
        tran_file = os.path.join(self.test_folder, "aaa_tran.gff")
        pro_file = os.path.join(self.test_folder, "aaa_processing.gff")
        wig_f_file = os.path.join(self.wig_folder, "wig_f.wig")
        wig_r_file = os.path.join(self.wig_folder, "wig_r.wig")
        gen_file(gff_file, self.example.gff_file)
        gen_file(tss_file, self.example.gff_file)
        gen_file(tran_file, self.example.gff_file)
        gen_file(pro_file, self.example.gff_file)
        output_file = os.path.join(self.test_folder, "output")
        output_table = os.path.join(self.test_folder, "table")
        coverage = [0, 0, 0, 50, 10]
        si.replicate_comparison = self.mock.mock_replicate_comparison
        si.coverage_comparison = self.mock.mock_coverage_comparison
        args = self.mock_args.mock()
        args.gff_file = gff_file
        args.tran_file = tran_file
        args.pro_file = pro_file
        args.tss_file = tss_file
        args.table_best = True
        args.cutoffs = coverage
        args.out_folder = self.test_folder
        args.file_type = "frag"
        args.cut_notex = coverage
        args.input_libs = "input_libs"
        args.wig_folder = self.wig_folder
        args.wig_f_file = wig_f_file
        args.wig_r_file = wig_r_file
        args.tss_source = True
        args.output_file = output_file
        args.output_table = output_table
        args.in_cds = False
        args.wigs_f = None
        args.wigs_r = None
        args.ex_srna = False
        si.intergenic_srna(args, args.input_libs, None, args.wigs_f,
                           args.wigs_r)
        self.assertTrue(os.path.exists(output_file))
        self.assertTrue(os.path.exists(output_table))
Exemplo n.º 59
0
class TestGetPolyT(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_filter_term(self):
        cands = [
            {
                "r_stem": 6,
                "l_stem": 6,
                "miss": 2,
                "print": False,
                "strain": "aaa",
                "end": 500,
                "start": 30
            },
            {
                "r_stem": 6,
                "l_stem": 6,
                "miss": 0,
                "print": False,
                "strain": "aaa",
                "end": 400,
                "start": 50
            },
            {
                "r_stem": 3,
                "l_stem": 3,
                "miss": 2,
                "print": False,
                "strain": "aaa",
                "end": 50,
                "start": 10
            },
            {
                "r_stem": 6,
                "l_stem": 6,
                "miss": 0,
                "print": False,
                "strain": "bbb",
                "end": 450,
                "start": 60
            },
        ]
        terms = []
        gpt.filter_term(cands, terms, 0.25)
        self.assertDictEqual(
            terms[0], {
                'end': 500,
                'l_stem': 6,
                'miss': 0,
                'print': False,
                'strain': 'aaa',
                'r_stem': 6,
                'start': 30
            })
        self.assertDictEqual(
            terms[1], {
                'end': 450,
                'l_stem': 6,
                'miss': 0,
                'print': False,
                'strain': 'bbb',
                'r_stem': 6,
                'start': 60
            })

    def test_check_sec(self):
        sec = "...((((((((..))))))..))...."
        features, detect = gpt.check_sec(sec, 26)
        self.assertDictEqual(
            features, {
                'rights': 8,
                'loop': 2,
                'real_miss': 2,
                'lefts': 8,
                'l_stem': 0,
                'st_pos': 23,
                'r_stem': 10,
                'tmp_miss': 2
            })
        self.assertDictEqual(detect, {
            'detect_l': True,
            'conflict': False,
            'detect_r': True
        })
        sec = "........))))))..))...."
        features, detect = gpt.check_sec(sec, 21)
        self.assertDictEqual(
            features, {
                'r_stem': 0,
                'loop': 0,
                'lefts': 0,
                'l_stem': 0,
                'st_pos': 21,
                'tmp_miss': 10,
                'rights': 8,
                'real_miss': 2
            })
        self.assertDictEqual(detect, {
            'detect_r': True,
            'conflict': False,
            'detect_l': False
        })

    def test_detect_candidates(self):
        seq = "GATCGGCAGTATTAAACGTACTTTTTTTTTT"
        sec = "...((((((((....))))))..))......"
        args = self.mock_args.mock()
        args.max_loop = 10
        args.min_loop = 3
        args.max_stem = 20
        args.min_stem = 4
        args.miss_rate = 0.25
        args.at_tail = 3
        args.range_u = 6
        cands = gpt.detect_candidates(seq, sec, "test", "aaa", 30, 58,
                                      "gene_0", "gene_1", "+", args, "10-24",
                                      "70-100")
        refs = [{
            'strain': 'aaa',
            'print': False,
            'l_stem': 4,
            'parent_m': 'gene_1',
            'parent_p': 'gene_0',
            'detect_m': False,
            'ut': 4,
            'start': 27,
            'length': 12,
            'loop': 4,
            'end': 58,
            'miss': 0,
            'r_stem': 4,
            'name': 'test',
            'detect_p': False,
            'strand': '+',
            "p_pos": "10-24",
            "m_pos": "70-100"
        }, {
            'strain': 'aaa',
            'print': False,
            'l_stem': 5,
            'parent_m': 'gene_1',
            'parent_p': 'gene_0',
            'detect_m': False,
            'ut': 5,
            'start': 26,
            'length': 14,
            'loop': 4,
            'end': 59,
            'miss': 0,
            'r_stem': 5,
            'name': 'test',
            'detect_p': False,
            'strand': '+',
            "p_pos": "10-24",
            "m_pos": "70-100"
        }, {
            'strain': 'aaa',
            'print': False,
            'l_stem': 6,
            'parent_m': 'gene_1',
            'parent_p': 'gene_0',
            'detect_m': False,
            'ut': 6,
            'start': 25,
            'length': 16,
            'loop': 4,
            'end': 60,
            'miss': 0,
            'r_stem': 6,
            'name': 'test',
            'detect_p': False,
            'strand': '+',
            "p_pos": "10-24",
            "m_pos": "70-100"
        }, {
            'strain': 'aaa',
            'print': False,
            'l_stem': 7,
            'parent_m': 'gene_1',
            'parent_p': 'gene_0',
            'detect_m': False,
            'ut': 6,
            'start': 24,
            'length': 20,
            'loop': 4,
            'end': 63,
            'miss': 2,
            'r_stem': 9,
            'name': 'test',
            'detect_p': False,
            'strand': '+',
            "p_pos": "10-24",
            "m_pos": "70-100"
        }]
        for index in range(len(cands)):
            self.assertDictEqual(cands[index], refs[index])

    def test_check_parent(self):
        term = {"strain": "aaa", "start": 11, "end": 14}
        detects = {"parent_p": False, "parent_m": False}
        parent = gpt.check_parent(self.example.cdss, term, detects, "+", 3, 3,
                                  "parent_p")
        self.assertEqual(parent, "gene_0")
        parent = gpt.check_parent(self.example.cdss, term, detects, "-", 3, 3,
                                  "parent_m")
        self.assertEqual(parent, "gene_1")

    def test_parents(self):
        terms = [{
            "strain": "aaa",
            "start": 11,
            "end": 14,
            "parent_p": "gene_0",
            "parent_m": "gene_1",
            "p_pos": "3-5",
            "m_pos": "20-50"
        }, {
            "strain": "aaa",
            "start": 12,
            "end": 15,
            "parent_p": "tran0:1-11_+",
            "parent_m": "tran1:16-30_-",
            "p_pos": "1-11",
            "m_pos": "16-30"
        }]
        args = self.mock_args.mock()
        args.fuzzy_up_gene = 10
        args.fuzzy_up_ta = 10
        args.fuzzy_down_gene = 10
        args.fuzzy_down_ta = 10
        gpt.parents(terms, self.example.cdss, args)
        self.assertDictEqual(
            terms[0], {
                'parent_p': 'gene_0',
                'parent_m': 'gene_1',
                'start': 11,
                'strain': 'aaa',
                'end': 14,
                "p_pos": "3-5",
                "m_pos": "20-50"
            })
        self.assertDictEqual(
            terms[1], {
                'parent_p': 'tran0:1-11_+,gene_0',
                'parent_m': 'tran1:16-30_-,gene_1',
                'start': 12,
                'strain': 'aaa',
                'end': 15,
                "p_pos": "1-11",
                "m_pos": "16-30"
            })

    def test_compare_anno(self):
        terms = [{
            "strain": "aaa",
            "start": 11,
            "end": 14,
            "strand": "+"
        }, {
            "strain": "aaa",
            "start": 9,
            "end": 18,
            "strand": "-"
        }, {
            "strain": "aaa",
            "start": 209,
            "end": 218,
            "strand": "-"
        }]
        cands = gpt.compare_anno(self.example.cdss, terms, 3, 3)
        self.assertDictEqual(terms[0], {
            'strand': '+',
            'start': 11,
            'strain': 'aaa',
            'end': 14
        })
        self.assertDictEqual(terms[1], {
            "strain": "aaa",
            "start": 9,
            "end": 18,
            "strand": "-"
        })
Exemplo n.º 60
0
class TestTranscriptAssembly(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_read_wig(self):
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "frag_1",
            "strand": "+",
            "rep": "a"
        }]
        filename = os.path.join(self.test_folder, "test_f.wig")
        gen_file(filename, self.example.wig_f)
        wigs = ta.read_wig(filename, "+", libs)
        for i in range(len(wigs["aaa"]['frag_1']["test1|+|frag"])):
            self.assertEqual(
                wigs["aaa"]['frag_1']["test1|+|frag"][i],
                self.example.wigs_nf["aaa"]['frag_1']["test1|+|frag"][i])

    def test_detect_hight_toler(self):
        cover = 100
        height = 5
        tmp_covers = {"best": 10, "toler": 2}
        tracks = []
        ta.detect_hight_toler(cover, height, tmp_covers, tracks,
                              "test_1|+|frag")
        self.assertDictEqual(tmp_covers, {'best': 100, 'toler': 2})

    def test_check_tex_conds(self):
        check_tex = []
        tracks = ["test1", "test2"]
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "1",
            "strand": "+",
            "rep": "a"
        }, {
            "name": "test2",
            "type": "tex",
            "cond": "2",
            "strand": "+",
            "rep": "a"
        }]
        texs = {"test1": 2, "test2": 2}
        conds = {}
        ta.check_tex_conds(tracks, libs, texs, check_tex, conds, 1)
        self.assertDictEqual(conds, {'1': 1, '2': 1})

    def test_elongation(self):
        covers = {
            "texnotex_1": {
                "test1|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 20],
                "test2|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100]
            }
        }
        libs = [{
            "name": "test1",
            "type": "tex",
            "cond": "texnotex_1",
            "strand": "+",
            "rep": "a"
        }, {
            "name": "test2",
            "type": "notex",
            "cond": "texnotex_1",
            "strand": "+",
            "rep": "a"
        }]
        reps = {"tex": "all_1", "frag": "all_1"}
        tmp_texs = {"test1_test2": 2}
        tolers = []
        trans = {"aaa": []}
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 5
        args.tex = 2
        ta.elongation(covers, tmp_texs, libs, "+", trans, args, "aaa", [])
        self.assertDictEqual(
            trans, {'aaa': [-1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 100]})

    def test_transfer_to_tran(self):
        reps = {"tex": "all_1", "frag": "all_1"}
        tmp_texs = {"test1": 2}
        libs = [{
            "name": "test1",
            "type": "frag",
            "cond": "frag_1",
            "strand": "+",
            "rep": "a"
        }]
        args = self.mock_args.mock()
        args.height = 10
        args.tex = 1
        args.replicates = reps
        tolers, trans = ta.transfer_to_tran(self.example.wigs_f, libs,
                                            tmp_texs, "+", args)
        self.assertDictEqual(tolers,
                             {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7.0, 20]})
        self.assertDictEqual(trans,
                             {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]})

    def test_fill_gap_and_print(self):
        trans = {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]}
        out = StringIO()
        tolers = {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7, 7, 7, 7, 7, 7, 7, 20]}
        args = self.mock_args.mock()
        args.tolerance = 3
        args.low_cutoff = 5
        args.width = 1
        ta.fill_gap_and_print(trans, "+", out, tolers, "TEX", args)
        self.assertEqual(out.getvalue(), self.example.out_tran + "\n")

    def test_print_transctipt(self):
        out = StringIO()
        ta.print_transctipt(100, 200, 20, 1, 40, "TEX", 20, out, "aaa", "+")
        self.assertEqual(
            out.getvalue(),
            "aaa\tANNOgesic\ttranscript\t100\t200\t.\t+\t.\tID=tran_1;Name=transcript_00001;high_coverage=40;low_coverage=20;detect_lib=TEX\n"
        )

    def test_assembly(self):
        wig_f_file = os.path.join(self.test_folder, "aaa_forward.wig")
        wig_r_file = os.path.join(self.test_folder, "aaa_reverse.wig")
        wig_f2_file = os.path.join(self.test_folder, "aaa2_forward.wig")
        wig_r2_file = os.path.join(self.test_folder, "aaa2_reverse.wig")
        gen_file(wig_f_file, self.example.wig_f)
        gen_file(wig_r_file, self.example.wig_r)
        gen_file(wig_f2_file, self.example.wig_f)
        gen_file(wig_r2_file, self.example.wig_r)
        reps = {"tex": "all_1", "frag": "all_1"}
        out_file = os.path.join(self.test_folder, "out")
        input_lib = [
            "aaa_forward.wig:frag:1:a:+", "aaa_reverse.wig:frag:1:a:-",
            "aaa2_forward.wig:tex:1:a:+", "aaa2_reverse.wig:tex:1:a:-"
        ]
        args = self.mock_args.mock()
        args.replicates = reps
        args.height = 10
        args.width = 1
        args.tolerance = 3
        args.tex = 2
        args.low_cutoff = 5
        ta.assembly(wig_f_file, wig_r_file, self.test_folder, input_lib,
                    out_file, "TEX", args)
        datas = import_data(out_file)
        self.assertEqual("\n".join(datas),
                         "##gff-version 3\n" + self.example.out_tran)