def test_stat_ta_gff(self):
     gff_file = os.path.join(self.test_folder, "aaa.gff")
     ta_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(gff_file, self.example.gff)
     gen_file(ta_file, self.example.ta)
     stat_file = os.path.join(self.test_folder, "stat")
     out_ta_file = os.path.join(self.test_folder, "out_ta.gff")
     out_gff_file = os.path.join(self.test_folder, "out.gff")
     stc.stat_ta_gff(ta_file, gff_file, stat_file,
                     out_ta_file, out_gff_file, ["gene"])
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas),
                      ("For gene:\n\tAll genomes:\n\tThe transcript "
                       "information compares with gene:\n" + \
                       self.example.print_tag))
     datas, attributes = extract_info(out_ta_file, "file")
     self.assertListEqual(
         datas,
         ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.'])
     for attribute in attributes:
         if "type" in attribute:
             self.assertEqual(attribute, "type=cover_CDS")
         if "associated_cds=" in attribute:
             self.assertEqual(attribute, "associated_cds=YP_498609.1")
     datas, attributes = extract_info(out_gff_file, "file")
     self.assertListEqual(datas, ['aaa\tRefseq\tgene\t517\t1878\t.\t+\t.',
                                  'aaa\tRefseq\tCDS\t517\t1878\t.\t+\t.'])
     for attribute in attributes:
         if "Parent_tran" in attribute:
             self.assertEqual(attribute, "Parent_tran=tran0")
Пример #2
0
 def test_extract_blast(self):
     esi.read_gff = Mock_func().mock_read_gff
     nr_blast = os.path.join(self.test_folder, "nr_table")
     gen_file(nr_blast, self.example.blast_nr_all)
     srna_blast = os.path.join(self.test_folder, "srna_table")
     gen_file(srna_blast, self.example.blast_srna_all)
     output_file = os.path.join(self.test_folder, "out.gff")
     output_table = os.path.join(self.test_folder, "out.csv")
     esi.extract_blast(nr_blast, "test.srna", output_file, output_table,
                       "nr", None, None)
     datas, attributes = extract_info(output_file, "file")
     refs, ref_attributes = extract_info(self.example.out_nr_gff, "string")
     self.assertEqual(set(datas), set(refs[1:]))
     self.assertEqual(set(attributes[0]), set(attributes[0]))
     self.assertEqual(set(attributes[1]), set(attributes[1]))
     datas = import_data(output_table)
     esi.extract_blast(srna_blast, "test.srna", output_file, output_table,
                       "sRNA", None, None)
     datas, attributes = extract_info(output_file, "file")
     refs, ref_attributes = extract_info(self.example.out_srna_gff,
                                         "string")
     self.assertEqual(set(datas), set(refs[1:]))
     self.assertEqual(set(attributes[0]), set(attributes[0]))
     self.assertEqual(set(attributes[1]), set(attributes[1]))
     datas = import_data(output_table)
     self.assertEqual(set(datas),
                      set(self.example.out_srna_csv.split("\n")))
 def test_stat_ta_tss(self):
     tss_file = os.path.join(self.test_folder, "aaa_TSS.gff")
     ta_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(tss_file, self.example.tss)
     gen_file(ta_file, self.example.ta)
     stat_file = os.path.join(self.test_folder, "stat")
     out_ta_file = os.path.join(self.test_folder, "out_ta.gff")
     out_tss_file = os.path.join(self.test_folder, "out_tss.gff")
     stc.stat_ta_tss(ta_file, tss_file, stat_file,
                     out_ta_file, out_tss_file, 5)
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas),
                      "All genomes:\n" + self.example.print_tas)
     datas, attributes = extract_info(out_ta_file, "file")
     self.assertListEqual(
         datas,
         ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.'])
     for attribute in attributes:
         if "associated_tss" in attribute:
             self.assertEqual("associated_tss=TSS:2131_f")
     datas, attributes = extract_info(out_tss_file, "file")
     self.assertListEqual(datas,
                          ['aaa\tTSSpredator\tTSS\t2131\t2131\t.\t+\t.'])
     for attribute in attributes:
         if "Parent_tran" in attribute:
             self.assertEqual(attribute, "Parent_tran=tran0")
Пример #4
0
 def test_stat_ta_gff(self):
     gff_file = os.path.join(self.test_folder, "aaa.gff")
     ta_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(gff_file, self.example.gff)
     gen_file(ta_file, self.example.ta)
     stat_file = os.path.join(self.test_folder, "stat")
     out_ta_file = os.path.join(self.test_folder, "out_ta.gff")
     out_gff_file = os.path.join(self.test_folder, "out.gff")
     stc.stat_ta_gff(ta_file, gff_file, stat_file, out_ta_file,
                     out_gff_file, ["gene"])
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas), "For gene:\n\tAll strains:\n\tThe transcriptome assembly information compares with gene:\n" + \
                                         self.example.print_tag)
     datas, attributes = extract_info(out_ta_file, "file")
     self.assertListEqual(
         datas,
         ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.'])
     for attribute in attributes:
         if "type" in attribute:
             self.assertEqual(attribute, "type=cover_CDS")
         if "associated_cds=" in attribute:
             self.assertEqual(attribute, "associated_cds=YP_498609.1")
     datas, attributes = extract_info(out_gff_file, "file")
     self.assertListEqual(datas, [
         'aaa\tRefseq\tgene\t517\t1878\t.\t+\t.',
         'aaa\tRefseq\tCDS\t517\t1878\t.\t+\t.'
     ])
     for attribute in attributes:
         if "Parent_tran" in attribute:
             self.assertEqual(attribute, "Parent_tran=tran0")
Пример #5
0
 def test_overlap(self):
     out = StringIO()
     print_list = []
     fg.overlap(self.example.tas, self.example.gffs, print_list, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     refs, attributes_ref = extract_info(self.example.out_overlap, "string")
     self.assertEqual(set(datas), set(refs))
Пример #6
0
 def test_overlap(self):
     out = StringIO()
     print_list = []
     fg.overlap(self.example.tas, self.example.gffs, out, "merge_overlap")
     datas, attributes = extract_info(out.getvalue(), "string")
     refs, attributes_ref = extract_info(self.example.out_overlap, "string")
     self.assertEqual(set(datas), set(refs))
Пример #7
0
 def test_stat_ta_tss(self):
     tss_file = os.path.join(self.test_folder, "aaa_TSS.gff")
     ta_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(tss_file, self.example.tss)
     gen_file(ta_file, self.example.ta)
     stat_file = os.path.join(self.test_folder, "stat")
     out_ta_file = os.path.join(self.test_folder, "out_ta.gff")
     out_tss_file = os.path.join(self.test_folder, "out_tss.gff")
     stc.stat_ta_tss(ta_file, tss_file, stat_file, out_ta_file,
                     out_tss_file, 5)
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas),
                      "All strains:\n" + self.example.print_tas)
     datas, attributes = extract_info(out_ta_file, "file")
     self.assertListEqual(
         datas,
         ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.'])
     for attribute in attributes:
         if "associated_tss" in attribute:
             self.assertEqual("associated_tss=TSS:2131_f")
     datas, attributes = extract_info(out_tss_file, "file")
     self.assertListEqual(datas,
                          ['aaa\tTSSpredator\tTSS\t2131\t2131\t.\t+\t.'])
     for attribute in attributes:
         if "Parent_tran" in attribute:
             self.assertEqual(attribute, "Parent_tran=tran0")
Пример #8
0
 def test_longer_ta(self):
     fg.Gff3Parser = Mock_gff_parser
     out_file = os.path.join(self.test_folder, "test.out")
     ta_file = os.path.join(self.test_folder, "test.ta")
     with open(ta_file, "w") as fh:
         fh.write("ta")
     fg.longer_ta(ta_file, 30, out_file)
     datas, attributes = extract_info(out_file, "file")
     refs, attributes_ref = extract_info(self.example.out_long, "string")
     self.assertEqual(set(datas), set(refs[1:]))
Пример #9
0
 def test_longer_ta(self):
     fg.Gff3Parser = Mock_gff_parser
     out_file = os.path.join(self.test_folder, "test.out")
     ta_file = os.path.join(self.test_folder, "test.ta")
     with open(ta_file, "w") as fh:
         fh.write("ta")
     fg.longer_ta(ta_file, 30, out_file)
     datas, attributes = extract_info(out_file, "file")
     refs, attributes_ref = extract_info(self.example.out_long, "string")
     self.assertEqual(set(datas), set(refs[1:]))
Пример #10
0
 def test_combine(self):
     cft.Gff3Parser = Mock_Gff_parser
     output_file = os.path.join(self.test_folder, "test.out")
     frag_file = os.path.join(self.test_folder, "frag.gff")
     tex_file = os.path.join(self.test_folder, "tex.gff")
     with open(frag_file, "w") as fh:
         fh.write("frag")
     with open(tex_file, "w") as th:
         th.write("tex")
     cft.combine(frag_file, tex_file, 5, output_file)
     trans = []
     outs, attributes_out = extract_info(output_file, "file")
     refs, attributes_ref = extract_info(self.example.out_tran, "string")
     self.assertEqual(set(outs), set(refs[1:]))
Пример #11
0
 def test_combine(self):
     cft.Gff3Parser = Mock_Gff_parser
     output_file = os.path.join(self.test_folder, "test.out")
     frag_file = os.path.join(self.test_folder, "frag.gff")
     tex_file = os.path.join(self.test_folder, "tex.gff")
     with open(frag_file, "w") as fh:
         fh.write("frag")
     with open(tex_file, "w") as th:
         th.write("tex")
     cft.combine(frag_file, tex_file, 5, output_file)
     trans = []
     outs, attributes_out = extract_info(output_file, "file")
     refs, attributes_ref = extract_info(self.example.out_tran, "string")
     self.assertEqual(set(outs), set(refs[1:]))
Пример #12
0
 def test_compare_tran(self):
     tran_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "Transcript",
         "start": 100,
         "end": 500,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_tran = {"ID": "tran0", "Name": "Tran_0"}
     out = StringIO()
     gffs = read_dict(3, self.example.gff_dict, self.example.attributes_gff)
     tran = Create_generator(tran_dict, attributes_tran, "gff")
     c_gff.compare_tran(gffs, tran, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     parents = []
     for attribute in attributes:
         for element in attribute:
             if "Parent" in element:
                 parents.append(element)
     self.assertEqual(set(datas),
                      set(["aaa\tRefseq\tCDS\t160\t300\t.\t+\t."]))
     self.assertEqual(set(parents), set(["Parent=tran0"]))
     out.close()
Пример #13
0
 def test_merge_srna_gff(self):
     out_file = os.path.join(self.test_folder, "test_out")
     gen_file(os.path.join(self.test_folder, "aaa.gff"),
              self.example.gff_file)
     ms.read_gff = Mock_func().mock_read_gff
     gffs = {"merge": out_file, "utr": "UTR", "normal": "inter"}
     ms.merge_srna_gff(gffs, False, 0.5,
                       os.path.join(self.test_folder, "aaa.gff"))
     datas, attributes = extract_info(out_file, "file")
     self.assertListEqual(datas, [
         'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.',
         'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.'
     ])
     self.assertEqual(
         set(attributes[0]),
         set([
             'overlap_percent=NA', 'end_cleavage=cleavage_40',
             'start_cleavage=cleavage_4', 'Name=sRNA_00000',
             'with_TSS=TSS_3', 'ID=srna0', 'sRNA_type=interCDS',
             'overlap_cds=NA'
         ]))
     self.assertEqual(
         set(attributes[1]),
         set([
             'overlap_percent=NA', 'end_cleavage=NA', 'Name=sRNA_00001',
             'with_TSS=NA', 'ID=srna1', 'sRNA_type=intergenic',
             'overlap_cds=NA'
         ]))
Пример #14
0
 def test_print_intersection(self):
     num_srna = {
         "total": 3,
         "intergenic": 1,
         "5'UTR_derived": 1,
         "3'UTR_derived": 0,
         "interCDS": 1
     }
     gff_name = os.path.join(self.test_folder, "test")
     out_stat = StringIO()
     keys = ["class_1", "class_4", "class_2", "class_3", "class_5"]
     datas = {
         "class_1": self.example.srnas,
         "class_2": self.example.srnas,
         "class_3": self.example.srnas,
         "class_4": self.example.srnas,
         "class_5": self.example.srnas
     }
     sc.print_intersection(datas, keys, 3, gff_name, "total", out_stat)
     self.assertEqual(
         out_stat.getvalue(),
         "\tclass_1 and class_4 and class_2 and class_3 and class_5 = 4(1.3333333333333333)\n"
     )
     results, attributes = extract_info(
         os.path.join(self.test_folder, "test"), "file")
     self.assertEqual("\n".join(results), self.example.gff_info)
Пример #15
0
 def test_print_file(self):
     num_all = {
         "all_cds": 600,
         "all_tRNA": 30,
         "all_rRNA": 30,
         "cds": 250,
         "tRNA": 20,
         "rRNA": 20
     }
     num_strain = {
         "test": {
             "all_cds": 300,
             "all_tRNA": 20,
             "all_rRNA": 20,
             "cds": 100,
             "tRNA": 10,
             "rRNA": 10
         }
     }
     out_cds_file = os.path.join(self.test_folder, "cds_file")
     stat_file = os.path.join(self.test_folder, "stat_file")
     vg.print_file(self.example.gffs, out_cds_file, stat_file, num_all,
                   num_strain)
     datas, attribute = extract_info(out_cds_file, "file")
     self.assertEqual("\n".join(datas),
                      "test\tRefSeq\tCDS\t200\t270\t.\t+\t.")
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas), self.example.out_stat_test)
 def test_srna_sorf_comparison(self):
     cro.Gff3Parser = Mock_Gff_parser
     sRNA_file = os.path.join(self.test_folder, "sRNA.gff")
     sORF_file = os.path.join(self.test_folder, "sORF.gff")
     gen_file(sRNA_file, "srna")
     gen_file(sORF_file, "sorf")
     sRNA_out = os.path.join(self.test_folder, "sRNA.out")
     sORF_out = os.path.join(self.test_folder, "sORF.out")
     cro.srna_sorf_comparison(sRNA_file, sORF_file, sRNA_out, sORF_out)
     srnas, attribute_srnas = extract_info(sRNA_out, "file")
     refs, attribute_refs = extract_info(self.example.srna_out, "string")
     self.assertEqual(set(srnas), set(refs[1:]))
     self.assertEqual(set(attribute_srnas[2]), set(attribute_refs[3]))
     sorfs, attribute_sorfs = extract_info(sORF_out, "file")
     refs, attribute_refs = extract_info(self.example.sorf_out, "string")
     self.assertEqual(set(sorfs), set(refs[1:]))
     self.assertEqual(set(attribute_sorfs[2]), set(attribute_refs[3]))
Пример #17
0
 def test_compare_cds_tran(self):
     gff_file = os.path.join(self.test_folder, "aaa.gff")
     tran_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(gff_file, self.example.gff_file)
     gen_file(tran_file, self.example.tran_file)
     self.sub._compare_cds_tran(gff_file, tran_file)
     datas, string = extract_info("test_folder/output/all_CDS/tmp_cds.gff", "file")
     self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
Пример #18
0
 def test_srna_sorf_comparison(self):
     cro.Gff3Parser = Mock_Gff_parser
     sRNA_file = os.path.join(self.test_folder, "sRNA.gff")
     sORF_file = os.path.join(self.test_folder, "sORF.gff")
     gen_file(sRNA_file, "srna")
     gen_file(sORF_file, "sorf")
     sRNA_out = os.path.join(self.test_folder, "sRNA.out")
     sORF_out = os.path.join(self.test_folder, "sORF.out")
     cro.srna_sorf_comparison(sRNA_file, sORF_file, sRNA_out, sORF_out)
     srnas, attribute_srnas = extract_info(sRNA_out, "file")
     refs, attribute_refs = extract_info(self.example.srna_out, "string")
     self.assertEqual(set(srnas), set(refs[1:]))
     self.assertEqual(set(attribute_srnas[2]), set(attribute_refs[3]))
     sorfs, attribute_sorfs = extract_info(sORF_out, "file")
     refs, attribute_refs = extract_info(self.example.sorf_out, "string")
     self.assertEqual(set(sorfs), set(refs[1:]))
     self.assertEqual(set(attribute_sorfs[2]), set(attribute_refs[3]))
Пример #19
0
 def test_compare_cds_tran(self):
     gff_file = os.path.join(self.test_folder, "aaa.gff")
     tran_file = os.path.join(self.test_folder, "aaa_transcript.gff")
     gen_file(gff_file, self.example.gff_file)
     gen_file(tran_file, self.example.tran_file)
     self.sub._compare_cds_tran(gff_file, tran_file)
     datas, string = extract_info("test_folder/output/all_CDSs/tmp_cds.gff",
                                  "file")
     self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
Пример #20
0
 def test_print_gff(self):
     out_m = StringIO()
     psortbs = [{'score': '10.0/10.0', 'local': 'Cytoplasmic/CytoplasmicMembrane',
                 'seq_id': 'aaa', 'protein_id': 'YP_500332.1',
                 'end': 140, 'strand': '+', 'start': 100}]
     ep.print_gff(self.example.gffs, psortbs, out_m)
     datas, attributes = extract_info(out_m.getvalue(), "string")
     self.assertEqual(set(datas), set(self.example.out_data.split("\n")))
     ref_attributes = self.example.out_attributes.split("\n")
     for index in range(len(attributes)):
         self.assertEqual(set(attributes[index]), set(ref_attributes[index].split(";")))
     out_m.close()
Пример #21
0
 def test_validate_gff(self):
     gff_file = os.path.join(self.test_folder, "test.gff")
     tss_file = os.path.join(self.test_folder, "test_TSS.gff")
     gen_file(gff_file, self.example.gff_file)
     gen_file(tss_file, self.example.tss_file)
     out_cds_file = os.path.join(self.test_folder, "cds_file")
     stat_file = os.path.join(self.test_folder, "stat_file")
     vg.validate_gff(tss_file, gff_file, stat_file, out_cds_file, 300, "tss")
     datas, attribute = extract_info(out_cds_file, "file")
     self.assertEqual("\n".join(datas), "test\tRefSeq\tCDS\t5\t10\t.\t+\t.")
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas), self.example.out_stat)
Пример #22
0
 def test_combine_gff(self):
     c_gff.Gff3Parser = Mock_Gff_parser
     gff_file = os.path.join(self.test_folder, "gff")
     ta_file = os.path.join(self.test_folder, "tran")
     tss_file = os.path.join(self.test_folder, "tss")
     term_file = os.path.join(self.test_folder, "term")
     utr3_file = os.path.join(self.test_folder, "utr3")
     utr5_file = os.path.join(self.test_folder, "utr5")
     ref_file = os.path.join(self.test_folder, "ref")
     gen_file(gff_file, "gff")
     gen_file(ta_file, "tran")
     gen_file(tss_file, "tss")
     gen_file(term_file, "term")
     gen_file(utr3_file, "utr3")
     gen_file(utr5_file, "utr5")
     gen_file(ref_file, self.example.out_file)
     out_file = os.path.join(self.test_folder, "test.out")
     c_gff.combine_gff(gff_file, ta_file, tss_file, utr5_file, utr3_file,
                       term_file, 5, 5, out_file)
     datas, attributes = extract_info(out_file, "file")
     refs, attributes = extract_info(ref_file, "file")
     self.assertEqual(set(datas), set(refs))
Пример #23
0
 def test_validate_gff(self):
     gff_file = os.path.join(self.test_folder, "test.gff")
     tss_file = os.path.join(self.test_folder, "test_TSS.gff")
     gen_file(gff_file, self.example.gff_file)
     gen_file(tss_file, self.example.tss_file)
     out_cds_file = os.path.join(self.test_folder, "cds_file")
     stat_file = os.path.join(self.test_folder, "stat_file")
     vg.validate_gff(tss_file, gff_file, stat_file, out_cds_file, 300,
                     "tss")
     datas, attribute = extract_info(out_cds_file, "file")
     self.assertEqual("\n".join(datas), "test\tRefSeq\tCDS\t5\t10\t.\t+\t.")
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas), self.example.out_stat)
Пример #24
0
 def test_print_file(self):
     num_all = {"all_cds": 600, "all_tRNA": 30, "all_rRNA": 30,
                "cds": 250, "tRNA": 20, "rRNA": 20}
     num_strain = {"test": {"all_cds": 300, "all_tRNA": 20,
                            "all_rRNA": 20, "cds": 100,
                            "tRNA": 10, "rRNA": 10}}
     out_cds_file = os.path.join(self.test_folder, "cds_file")
     stat_file = os.path.join(self.test_folder, "stat_file")
     vg.print_file(self.example.gffs, out_cds_file, stat_file, num_all, num_strain)
     datas, attribute = extract_info(out_cds_file, "file")
     self.assertEqual("\n".join(datas), "test\tRefSeq\tCDS\t200\t270\t.\t+\t.")
     datas = import_data(stat_file)
     self.assertEqual("\n".join(datas), self.example.out_stat_test)
Пример #25
0
 def test_combine_gff(self):
     c_gff.Gff3Parser = Mock_Gff_parser
     gff_file = os.path.join(self.test_folder, "gff")
     ta_file = os.path.join(self.test_folder, "tran")
     tss_file = os.path.join(self.test_folder, "tss")
     term_file = os.path.join(self.test_folder, "term")
     utr3_file = os.path.join(self.test_folder, "utr3")
     utr5_file = os.path.join(self.test_folder, "utr5")
     ref_file = os.path.join(self.test_folder, "ref")
     gen_file(gff_file, "gff")
     gen_file(ta_file, "tran")
     gen_file(tss_file, "tss")
     gen_file(term_file, "term")
     gen_file(utr3_file, "utr3")
     gen_file(utr5_file, "utr5")
     gen_file(ref_file, self.example.out_file)
     out_file = os.path.join(self.test_folder, "test.out")
     c_gff.combine_gff(gff_file, ta_file, tss_file, utr5_file, utr3_file,
                       term_file, 5, 5, out_file)
     datas, attributes = extract_info(out_file, "file")
     refs, attributes = extract_info(ref_file, "file")
     self.assertEqual(set(datas), set(refs))
Пример #26
0
 def test_print_intersection(self):
     num_srna = {"total": 3, "intergenic": 1, "5'UTR_derived": 1,
                 "3'UTR_derived": 0, "interCDS": 1}
     gff_name = os.path.join(self.test_folder, "test")
     out_stat = StringIO()
     keys = ["class_1", "class_4", "class_2", "class_3", "class_5"]
     datas = {"class_1": self.example.srnas, "class_2": self.example.srnas,
              "class_3": self.example.srnas, "class_4": self.example.srnas,
              "class_5": self.example.srnas}
     sc.print_intersection(datas, keys, 3, gff_name, "total", out_stat)
     self.assertEqual(out_stat.getvalue(), "\tclass_1 and class_4 and class_2 and class_3 and class_5 = 4(1.3333333333333333)\n")
     results, attributes = extract_info(os.path.join(self.test_folder, "test"), "file")
     self.assertEqual("\n".join(results), self.example.gff_info)
    def test_extract_blast(self):
        esi.read_gff = Mock_func().mock_read_gff
        nr_blast = os.path.join(self.test_folder, "nr_table")
        gen_file(nr_blast, self.example.blast_nr_all)
        srna_blast = os.path.join(self.test_folder, "srna_table")
        gen_file(srna_blast, self.example.blast_srna_all)
        output_file = os.path.join(self.test_folder, "out.gff")
        output_table = os.path.join(self.test_folder, "out.csv")
        esi.extract_blast(nr_blast, "test.srna", output_file, output_table, "nr")
        datas, attributes = extract_info(output_file, "file")
        refs, ref_attributes = extract_info(self.example.out_nr_gff, "string")
        self.assertEqual(set(datas), set(refs[1:]))
        self.assertEqual(set(attributes[0]), set(attributes[0]))
        self.assertEqual(set(attributes[1]), set(attributes[1]))
        datas = import_data(output_table)
#        self.assertEqual(set(datas), set(self.example.out_nr_csv.split("\n")))
        esi.extract_blast(srna_blast, "test.srna", output_file, output_table, "sRNA")
        datas, attributes = extract_info(output_file, "file")
        refs, ref_attributes = extract_info(self.example.out_srna_gff, "string")
        self.assertEqual(set(datas), set(refs[1:]))
        self.assertEqual(set(attributes[0]), set(attributes[0]))
        self.assertEqual(set(attributes[1]), set(attributes[1]))
        datas = import_data(output_table)
        self.assertEqual(set(datas), set(self.example.out_srna_csv.split("\n")))
Пример #28
0
 def test_merge_srna_gff(self):
     out_file = os.path.join(self.test_folder, "test_out")
     gen_file(os.path.join(self.test_folder, "aaa.gff"),
              self.example.gff_file)
     ms.read_gff = Mock_func().mock_read_gff
     gffs = {"merge": out_file, "utr": "UTR", "normal": "inter"}
     ms.merge_srna_gff(gffs, False, 0.5,
                       os.path.join(self.test_folder, "aaa.gff"), False)
     datas, attributes = extract_info(out_file, "file")
     self.assertListEqual(datas,
                          ['aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.'])
     self.assertEqual(set(attributes[0]),
                      set(['overlap_cds=NA', 'Name=sRNA_00000',
                      'ID=aaa_srna0', 'sRNA_type=intergenic',
                      'end_cleavage=cleavage_40', 'with_TSS=TSS_3',
                      'overlap_percent=NA']))
Пример #29
0
 def test_compare_tran_term(self):
     trans = read_dict(3, self.example.tran_dict, self.example.attributes_tran)
     terms = read_dict(3, self.example.term_dict, self.example.attributes_term)
     out = StringIO()
     for tran in trans:
         for term in terms:
             c_gff.compare_tran_term(term, tran, out, 3)
     datas, attributes = extract_info(out.getvalue(), "string")
     parents = []
     for attribute in attributes:
         for element in attribute:
             if "Parent_tran" in element:
                 parents.append(element)
     self.assertEqual(set(datas), set(["aaa\tRefseq\tTerminator\t350\t367\t.\t+\t.",
                                       "bbb\tRefseq\tTerminator\t420\t429\t.\t-\t."]))
     self.assertEqual(set(parents), set(["Parent_tran=tran0", "Parent_tran=tran2"]))
     out.close()
Пример #30
0
 def test_compare_tran(self):
     tran_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "Transcript", "start": 100,
                  "end": 500, "phase": ".", "strand": "+", "score": "."}
     attributes_tran = {"ID": "tran0", "Name": "Tran_0"}
     out = StringIO()
     gffs = read_dict(3, self.example.gff_dict, self.example.attributes_gff)
     tran = Create_generator(tran_dict, attributes_tran, "gff")
     c_gff.compare_tran(gffs, tran, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     parents = []
     for attribute in attributes:
         for element in attribute:
             if "Parent_tran" in element:
                 parents.append(element)
     self.assertEqual(set(datas), set(["aaa\tRefseq\tCDS\t160\t300\t.\t+\t."]))
     self.assertEqual(set(parents), set(["Parent_tran=tran0"]))
     out.close()
Пример #31
0
 def test_compare_tran_term(self):
     trans = read_dict(3, self.example.tran_dict,
                       self.example.attributes_tran)
     terms = read_dict(3, self.example.term_dict,
                       self.example.attributes_term)
     out = StringIO()
     for tran in trans:
         for term in terms:
             c_gff.compare_tran_term(term, tran, out, 3)
     datas, attributes = extract_info(out.getvalue(), "string")
     parents = []
     for attribute in attributes:
         for element in attribute:
             if "Parent" in element:
                 parents.append(element)
     self.assertEqual(
         set(datas),
         set([
             "aaa\tRefseq\tTerminator\t350\t367\t.\t+\t.",
             "bbb\tRefseq\tTerminator\t420\t429\t.\t-\t."
         ]))
     self.assertEqual(set(parents), set(["Parent=tran0", "Parent=tran2"]))
     out.close()
Пример #32
0
 def test_uni(self):
     out = StringIO()
     fg.uni(self.example.tas, self.example.gffs, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     refs, attributes_ref = extract_info(self.example.out_uni, "string")
     self.assertEqual(set(datas), set(refs))
Пример #33
0
 def test_uni(self):
     out = StringIO()
     fg.uni(self.example.tas, self.example.gffs, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     refs, attributes_ref = extract_info(self.example.out_uni, "string")
     self.assertEqual(set(datas), set(refs))