示例#1
0
class Example(object):
    tss_dict = [{"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 3,
                 "end": 3, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 16,
                 "end": 16, "phase": ".", "strand": "-", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 54,
                 "end": 54, "phase": ".", "strand": "+", "score": "."}]
    attributes_tss = [{"ID": "CDS0", "Name": "CDS_0", "type": "Primary",
                       "associated_gene": "AAA_00001",
                       "utr_length": "Primary_25"},
                      {"ID": "CDS1", "Name": "CDS_1", "type": "Internal",
                       "associated_gene": "AAA_00002",
                       "utr_length": "Internal_NA"},
                      {"ID": "CDS2", "Name": "CDS_2",
                       "type": "Primary,Antisense",
                       "associated_gene": "AAA_00004,AAA_00006",
                       "utr_length": "Primary_25,Internal_NA"}]
    tss2_dict = [
        {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3,
         "end": 3, "phase": ".", "strand": "+", "score": "."},
        {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 18,
         "end": 18, "phase": ".", "strand": "-", "score": "."},
        {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 23,
         "end": 23, "phase": ".", "strand": "+", "score": "."}]
    attributes_tss2 = [{"ID": "CDS0", "Name": "CDS_0", "type": "Primary",
                        "associated_gene": "AAA_00001",
                        "utr_length": "Primary_25"},
                       {"ID": "CDS1", "Name": "CDS_1", "type": "Internal",
                        "associated_gene": "AAA_00002",
                        "utr_length": "Internal_NA"},
                       {"ID": "CDS2", "Name": "CDS_2",
                        "type": "Primary,Antisense",
                        "associated_gene": "AAA_00004,AAA_00006",
                        "utr_length": "Primary_25,Internal_NA"}]
    gff_dict = [{"start": 6, "end": 15, "phase": ".",
                 "strand": "+", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "gene"},
                {"start": 1258, "end": 2234, "phase": ".",
                 "strand": "+", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "gene"},
                {"start": 3544, "end": 6517, "phase": ".",
                 "strand": "-", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "gene"}]
    attributes_gff = [
        {"ID": "gene0", "Name": "gene_0", "locus_tag": "AAA_00001"},
        {"ID": "gene0", "Name": "gene_1", "locus_tag": "AAA_00002"},
        {"ID": "gene1", "Name": "gene_2", "locus_tag": "AAA_00003"}]
    tsss = []
    tsss2 = []
    genes = []
    for index in range(0, 3):
        tsss.append(Create_generator(
            tss_dict[index], attributes_tss[index], "gff"))
        tsss2.append(Create_generator(
            tss2_dict[index], attributes_tss2[index], "gff"))
        genes.append(Create_generator(
            gff_dict[index], attributes_gff[index], "gff"))
示例#2
0
 def test_check_overlap(self):
     tss_m_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 7,
                   "end": 7, "phase": ".", "strand": "+", "score": "."}
     attributes_tss_m = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal",
                         "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"}
     tss_m = Create_generator(tss_m_dict, attributes_tss_m, "gff")
     tss_p_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 7,
                   "end": 7, "phase": ".", "strand": "+", "score": "."}
     attributes_tss_p = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal",
                         "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"}
     tss_p = Create_generator(tss_p_dict, attributes_tss_p, "gff")
     tss_pre_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3,
                     "end": 3, "phase": ".", "strand": "+", "score": "."}
     attributes_tss_pre = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal",
                           "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"}
     tss_pre = Create_generator(tss_pre_dict, attributes_tss_pre, "gff")
     nums = {"tss_p": 0, "tss_m": 0, "tss": 0}
     tsss = {"tsss_p":[], "tsss_m": [], "merge": []}
     num_strain = {"aaa": {"overlap": 0, "tsspredator": 0, "manual": 0}}
     overlap_num = 0
     output = mm.check_overlap(True, tss_pre, nums, False, num_strain, overlap_num,
                               tss_m, tss_p, tsss, 1000, self.example.genes, self.example.genes)
     self.assertEqual(output, (False, 3, 1))
     output = mm.check_overlap(False, tss_pre, nums, 100, num_strain, overlap_num,
                               tss_m, tss_p, tsss, 1000, self.example.genes, self.example.genes)
     self.assertEqual(output, (False, 1000, 0))
class Example(object):

    gff_file = """test	RefSeq	CDS	5	10	.	+	.	ID=cds0;Name=CDS_0"""
    tss_file = """test	RefSeq	TSS	3	3	.	+	.	ID=tss0;Name=TSS_0"""
    tss_dict = [{
        "seq_id": "test",
        "source": "intergenic",
        "feature": "TSS",
        "start": 170,
        "end": 170,
        "phase": ".",
        "strand": "+",
        "score": "."
    }]
    attributes_tsss = [{"ID": "tss0", "Name": "TSS_0"}]
    tsss = []
    tsss.append(Create_generator(tss_dict[0], attributes_tsss[0], "gff"))
    gff_dict = [{
        "seq_id": "test",
        "source": "RefSeq",
        "feature": "CDS",
        "start": 200,
        "end": 270,
        "phase": ".",
        "strand": "+",
        "score": "."
    }]
    attributes_gff = [{"ID": "cds0", "Name": "CDS_0"}]
    gffs = []
    gffs.append(Create_generator(gff_dict[0], attributes_gff[0], "gff"))
    out_stat_test = """All genomes:
The number of cds which is start from TSS: 250 (0.4166666666666667)
The number of tRNA which is start from TSS: 20 (0.6666666666666666)
The number of rRNA which is start from TSS: 20 (0.6666666666666666)"""
    out_stat = """All genomes:
示例#4
0
 def test_compare(self):
     data1_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 140,
                   "end": 367, "phase": ".", "strand": "+", "score": "."}
     data2_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 180,
                   "end": 400, "phase": ".", "strand": "+", "score": "."}
     data3_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 50,
                   "end": 138, "phase": ".", "strand": "+", "score": "."}
     data4_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 5650,
                   "end": 7100, "phase": ".", "strand": "+", "score": "."}
     attributes = {"ID": "tran0", "Name": "Tran_0", "locus_tag": "AAA_00001"}
     overlap = False
     data1 = Create_generator(data1_dict, attributes, "gff")
     data2 = Create_generator(data2_dict, attributes, "gff")
     data3 = Create_generator(data3_dict, attributes, "gff")
     data4 = Create_generator(data4_dict, attributes, "gff")
     overlap12 = cft.compare(data1, data2, overlap, 5)
     self.assertEqual(data1.start, 140)
     self.assertEqual(data1.end, 400)
     overlap13 = cft.compare(data1, data3, overlap, 5)
     self.assertEqual(data1.start, 50)
     self.assertEqual(data1.end, 400)
     overlap14 = cft.compare(data1, data4, overlap, 5)
     self.assertEqual(data1.start, 50)
     self.assertEqual(data1.end, 400)
     self.assertTrue(overlap12)
     self.assertTrue(overlap13)
     self.assertFalse(overlap14)
 def test_sub_operon(self):
     tss_dict = [{"seq_id": "aaa", "source": "Refseq",
                  "feature": "TSS", "start": 140,
                  "end": 140, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq",
                  "feature": "TSS", "start": 200,
                  "end": 200, "phase": ".", "strand": "+", "score": "."}]
     attributes_tss = [
         {"ID": "tss0", "Name": "TSS_0", "locus_tag": "AAA_00001"},
         {"ID": "tss1", "Name": "TSS_1", "locus_tag": "BBB_00001"}]
     gff_dict = [{"seq_id": "aaa", "source": "Refseq",
                  "feature": "CDS", "start": 540,
                  "end": 640, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq",
                  "feature": "TSS", "start": 166,
                  "end": 198, "phase": ".", "strand": "+", "score": "."}]
     attributes_gff = [
         {"ID": "tss0", "Name": "TSS_0", "locus_tag": "AAA_00001"},
         {"ID": "tss1", "Name": "TSS_1", "locus_tag": "BBB_00001"}] 
     tsss = {"with_feature": True, "num_feature": 2, "data_list": []}
     genes = {"data_list": []}
     for index in range(0, 2):
         genes["data_list"].append(Create_generator(
             gff_dict[index], attributes_gff[index], "gff"))
         tsss["data_list"].append(Create_generator(
             tss_dict[index], attributes_tss[index], "gff"))
     operons = op.sub_operon("+", tsss, 141, 800, genes, 30)
     self.assertDictEqual(operons[0],
                          {'end': 199, 'start': 141, 'strand': '+'})
     self.assertDictEqual(operons[1],
                          {'end': 799, 'start': 200, 'strand': '+'})
class Example(object):
    inter = """aaa	UTR_derived	sORF	2	6	.	+	.	ID=inter0;Name=inter_00000;UTR_type=3utr"""
    srna = """aaa	UTR_derived	sRNA	5	8	.	+	.	ID=aaa_srna0;Name=srna_00000;UTR_type=3utr"""
    tss = """aaa	tsspredator	TSS	1	1	.	+	.	ID=aaa_tss0;Name=TSS_00000"""
    wigs = {"aaa": {"frag_1": {"track_1|+|frag": [
        100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21,
        2, 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2]}}}
    ta_dict = [{"seq_id": "aaa", "source": "intergenic",
                "feature": "Transcript", "start": 1,
                "end": 23, "phase": ".", "strand": "+", "score": "."}]
    attributes_tas = [{"ID": "tran0", "Name": "Transcript_0"}]
    tas = []
    tas.append(Create_generator(ta_dict[0], attributes_tas[0], "gff"))
    tss_dict = [{"seq_id": "aaa", "source": "tsspredator",
                 "feature": "TSS", "start": 1,
                 "end": 1, "phase": ".", "strand": "+", "score": "."}]
    attributes_tss = [{"ID": "tss0", "Name": "TSS_0"}]
    tsss = []
    tsss.append(Create_generator(tss_dict[0], attributes_tss[0], "gff"))
    srna_dict = [{"seq_id": "aaa", "source": "Refseq",
                  "feature": "sRNA", "start": 5,
                  "end": 8, "phase": ".", "strand": "+", "score": "."}]
    attributes_srna = [{"ID": "srna0", "Name": "sRNA_0"}]
    srnas = []
    srnas.append(Create_generator(srna_dict[0], attributes_srna[0], "gff"))
示例#7
0
 def test_diff_strand_tss_gene(self):
     tss_dict = {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 3,
                 "end": 3, "phase": ".", "strand": "+", "score": "."}
     attributes_tss = {"ID": "TSS0", "Name": "TSS_0",
                       "type": "Primary,Internal",
                       "associated_gene": "AAA_00001,AAA_00004",
                       "utr_length": "Primary_25,Internal_NA"}
     tss = Create_generator(tss_dict, attributes_tss, "gff")
     tss_entry = [tss.attribute_string, {
         "utr_length": "Primary_25", "type": "Primary",
         "associated_gene": "AAA_00001"}]
     anti_ends = {"forward": 1, "reverse": -1}
     gene_ends = {"forward": -1, "reverse": -1}
     gff_dict = {"seq_id": "aaa", "source": "Refseq",
                 "feature": "CDS", "start": 6,
                 "end": 12, "phase": ".", "strand": "-", "score": "."}
     attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00005"}
     gene = Create_generator(gff_dict, attributes_gff, "gff")
     checks = {"orphan": False, "int_anti": False}
     output = mm.diff_strand_tss_gene(gene, tss, anti_ends, gene_ends,
                                      checks, tss_entry)
     self.assertEqual(output[0],
                      'utr_length=Primary_25,Antisense_NA;associated_gene=AAA_00001,AAA_00005;type=Primary,Antisense;Name=TSS_3+')
     self.assertDictEqual(output[1], {
         'Name': 'TSS_3+', 'utr_length': 'Primary_25,Antisense_NA',
         'type': 'Primary,Antisense', 'associated_gene': 'AAA_00001,AAA_00005'})
示例#8
0
 def test_get_circrna(self):
     circs = []
     gffs = []
     for index in range(0, 5):
         circs.append(Create_generator(
             self.example.circ_dict[index],
             self.example.attributes_circ[index], "circ"))
     for index in range(0, 3):
         gffs.append(Create_generator(
             self.example.gffs_dict[index],
             self.example.attributes_gffs[index], "gff"))
     out = StringIO()
     out_best = StringIO()
     args = self.mock_args.mock()
     args.start_ratio = 0.3
     args.end_ratio = 0.3
     args.support = 5
     nums = circ.get_circrna(circs, gffs, 50, out, out_best, args)
     self.assertDictEqual(nums["support"], {
         'aaa': {0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1},
         'all': {0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1},
         'bbb': {0: 1, 5: 1}})
     self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3})
     self.assertDictEqual(nums["conflict"], {'bbb': {0: 1, 5: 1},
                                             'aaa': {},
                                             'all': {0: 1, 5: 1}})
示例#9
0
 def test_detect_coverage(self):
     tss = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 2,
         "end": 2,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     ref = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 3,
         "end": 3,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_tss = {"type": "Primary", "ID": "tss0", "Name": "TSS:2_+"}
     attributes_ref = {"type": "Primary", "ID": "tss1", "Name": "TSS:3_+"}
     tss_diff, ref_diff = co.detect_coverage(
         self.example.wigs_f, Create_generator(tss, attributes_tss, "gff"),
         Create_generator(ref, attributes_ref, "gff"))
     self.assertEqual(tss_diff, 100)
     self.assertEqual(ref_diff, 50)
示例#10
0
 def test_detect_conflict(self):
     circ_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "circRNA",
         "start": 100,
         "end": 467,
         "phase": ".",
         "strand": "+",
         "score": ".",
         "support": 30,
         "start_site": 30,
         "end_site": 35,
         "situation": "P",
         "splice_type": "C"
     }
     attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"}
     circrna = Create_generator(circ_dict, attributes_circ, "circ")
     gffs = [
         Create_generator(self.example.cds_dict,
                          self.example.attributes_cds, "gff")
     ]
     args = self.mock_args.mock()
     args.start_ratio = 0.3
     args.end_ratio = 0.3
     args.support = 5
     out = StringIO()
     out_best = StringIO()
     circ.detect_conflict(gffs, circrna, 0, out, out_best, args)
     self.assertEqual(
         out.getvalue(),
         "circRNA_0	aaa	+	100	467	AAA_00001	30	1.0	0.8571428571428571\n")
     out.close()
示例#11
0
class Example(object):

    gff_dict = [
        {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3,
         "end": 30, "phase": ".", "strand": "+", "score": "."},
        {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 14,
         "end": 35, "phase": ".", "strand": "-", "score": "."},
        {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 37,
         "end": 55, "phase": ".", "strand": "-", "score": "."},
        {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 40,
         "end": 66, "phase": ".", "strand": "+", "score": "."},
        {"seq_id": "bbb", "source": "Refseq", "feature": "CDS", "start": 4,
         "end": 25, "phase": ".", "strand": "-", "score": "."}]
    attributes_gff = [
        {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001"},
        {"ID": "cds1", "Name": "CDS_1", "locus_tag": "AAA_00002",
         "protein_id": "YP_500332.1"},
        {"ID": "cds2", "Name": "CDS_2"},
        {"ID": "cds3", "Name": "CDS_3", "locus_tag": "AAA_00003"},
        {"ID": "cds4", "Name": "CDS_4", "locus_tag": "BBB_00001"}]
    ta_dict = [{"seq_id": "aaa", "source": "Refseq",
                "feature": "Transcript", "start": 1,
                "end": 367, "phase": ".", "strand": "+", "score": "."},
               {"seq_id": "aaa", "source": "Refseq",
                "feature": "Transcript", "start": 230,
                "end": 240, "phase": ".", "strand": "+", "score": "."},
               {"seq_id": "bbb", "source": "Refseq",
                "feature": "Transcript", "start": 430,
                "end": 5167, "phase": ".", "strand": "-", "score": "."}]
    attributes_tas = [
        {"ID": "tran0", "Name": "Transcript_0", "locus_tag": "AAA_00001"},
        {"ID": "tran1", "Name": "Transcript_1", "locus_tag": "AAA_00002"},
        {"ID": "tran2", "Name": "Transcript_2", "locus_tag": "BBB_00001"}]
    tss_dict = [{"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 2,
                 "end": 2, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 230,
                 "end": 230, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "bbb", "source": "Refseq",
                 "feature": "TSS", "start": 5166,
                 "end": 5166, "phase": ".", "strand": "-", "score": "."}]
    attributes_tss = [{"ID": "tss0", "Name": "TSS_0", "type": "Primary",
                       "associated_gene": "AAA_00001"},
                      {"ID": "tss1", "Name": "TSS_1", "type": "Internal",
                       "associated_gene": "AAA_00002"},
                      {"ID": "tss2", "Name": "TSS_2", "type": "Orphan",
                       "associated_gene": "orphan"}]
    gffs = []
    tas = []
    tsss = []
    for index in range(0, 3):
        gffs.append(Create_generator(gff_dict[index],
                                     attributes_gff[index], "gff"))
        tas.append(Create_generator(ta_dict[index],
                                    attributes_tas[index], "gff"))
        tsss.append(Create_generator(tss_dict[index],
                                     attributes_tss[index], "gff"))
    seq = {"aaa": "AAAATTATAGGCGTAGTAACCTCTTGATAGCGATGGATATAGACCCTTATAAGGCCTCTGATTAGAAAATAGGTAGGCCCCCGGGGGTGTGTAATAGATAGAT",
           "bbb": "ATATGTACCCCGCGCCGTATAGCTATAAATTCGCTGCTTATTTTATA"}
示例#12
0
class Example(object):
    tar_dict = [{"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 3,
                 "end": 3, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 24,
                 "end": 24, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 1243,
                 "end": 1243, "phase": ".", "strand": "+", "score": "."}]
    attributes_tar = [{"coverage": "3", "ID": "tss1", "Name": "TSS:3_+"},
                      {"coverage": "340", "ID": "tss2", "Name": "TSS:24_+"},
                      {"coverage": "4440", "ID": "tss3", "Name": "TSS:1243_+"}]
    ref_dict = [{"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 3,
                 "end": 3, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 333,
                 "end": 333, "phase": ".", "strand": "+", "score": "."},
                {"seq_id": "aaa", "source": "Refseq",
                 "feature": "TSS", "start": 1242,
                 "end": 1242, "phase": ".", "strand": "+", "score": "."}]
    attributes_ref = [{"coverage": "3", "ID": "tss1", "Name": "TSS:3_+"},
                      {"coverage": "330", "ID": "tss2", "Name": "TSS:333_+"},
                      {"coverage": "1230", "ID": "tss3", "Name": "TSS:1242_+"}]
    tars = []
    refs = []
    for index in range(0, 3):
        tars.append(Create_generator(tar_dict[index],
                                     attributes_tar[index], "gff"))
        tars[-1].attributes["print"] = False
        refs.append(Create_generator(ref_dict[index],
                                     attributes_ref[index], "gff"))
        refs[-1].attributes["print"] = False
示例#13
0
class Example(object):
    srnas = {"RNAplex": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                   {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,7", "tar_pos": "3,15"}],
                         "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                         "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                   {"target": "AAA_00002|dnaA", "energy": -3.43, "rank": 3, "srna_pos": "2,10", "tar_pos": "10,15"},
                                   {"target": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}]},
             "RNAup": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"},
                                 {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}],
                       "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}],
                       "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}]}}
    srna_dict = [{"start": 6, "end": 15, "phase": ".",
                  "strand": "+", "seq_id": "aaa", "score": ".",
                  "source": "Refseq", "feature": "sRNA"},
                 {"start": 1258, "end": 2234, "phase": ".",
                  "strand": "+", "seq_id": "aaa", "score": ".",
                  "source": "Refseq", "feature": "sRNA"},
                 {"start": 3544, "end": 6517, "phase": ".",
                  "strand": "-", "seq_id": "aaa", "score": ".",
                  "source": "Refseq", "feature": "sRNA"}]
    attributes_srna = [{"ID": "srna0", "Name": "sRNA_0"},
                       {"ID": "srna1", "Name": "sRNA_1"},
                       {"ID": "srna2", "Name": "sRNA_2"}]
    gff_dict = [{"start": 100, "end": 150, "phase": ".",
                 "strand": "+", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "CDS"},
                {"start": 2348, "end": 2934, "phase": ".",
                 "strand": "+", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "CDS"},
                {"start": 5544, "end": 5597, "phase": ".",
                 "strand": "-", "seq_id": "aaa", "score": ".",
                 "source": "Refseq", "feature": "CDS"}]
    attributes_gff = [{"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001"},
                      {"ID": "cds0", "Name": "CDS_1", "locus_tag": "AAA_00002"},
                      {"ID": "cds1", "Name": "CDS_2", "locus_tag": "AAA_00003"}]
    srna_gffs = []
    gffs = []
    for index in range(0, 3):
        srna_gffs.append(Create_generator(srna_dict[index], attributes_srna[index], "gff"))
        gffs.append(Create_generator(gff_dict[index], attributes_gff[index], "gff"))    
    out_rna_txt = """>SAOUHSC_00001|dnaA
>srna1023
((((((&)))))) 571,576 :  20,25  (-5.30 = -7.89 +  0.18 +  2.41)"""
    out_print = """sRNA	strain	sRNA_position	sRNA_interacted_position_RNAplex	sRNA_strand	target	target_position	target_interacted_position_RNAplex	target_strand	energy_RNAplex	rank_RNAplex
sRNA_1	aaa	1258-2234	1259-1267	+	AAA_00003	5544-5597	5550-5545	-	-10.5	1
sRNA_2	aaa	3544-6517	6508-6516	-	AAA_00001	100-150	89-94	+	-23.5	1
sRNA_2	aaa	3544-6517	6508-6516	-	AAA_00003	5544-5597	5550-5545	-	-6.5	2
sRNA_0	aaa	6-15	7-15	+	AAA_00001	100-150	89-94	+	-6.5	1
sRNA_0	aaa	6-15	7-12	+	AAA_00002|dnaA	2348-2934	2330-2342	+	-3.5	2
"""
    rnaup = """>srna1023
>SAOUHSC_00001|dnaA
.(((((&))))). 571,576 :  20,25  (-4.87 = -8.00 + 0.31 + 2.81)
AACCUC&GGGGUU
>SAOUHSC_00002
(((..((((((((((((&)))))))))))).)))  14,30  :  11,26  (-5.91 = -13.15 + 4.20 + 3.05)
GAAGAUCCUAUUUUUAA&UUAAAAAUGGGGGUUC
"""
    rnaplex = """>SAOUHSC_00001|dnaA
示例#14
0
 def read_file(self, gff_file, input_file, hypo):
     self.circs = []
     self.gffs = []
     for index in range(0, 5):
         self.circs.append(Create_generator(self.example.circ_dict[index],
                                       self.example.attributes_circ[index], "circ"))
     for index in range(0, 3):
         self.gffs.append(Create_generator(self.example.gffs_dict[index],
                                      self.example.attributes_gffs[index], "gff"))
     return self.circs, self.gffs, 50
示例#15
0
class Example(object):

    seq_file = """>aaa
AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC"""
    gff_file = """aaa\tRefseq\tgene\t1\t12\t.\t+\t.\tID=gene_0;Name=GENE_0;locus_tag=AAA_00001
aaa\tRefseq\tCDS\t1\t12\t.\t+\t.\tID=cds_0;Name=CDS_0;locus_tag=AAA_00001;protein_id="YP.00001
aaa\tRefseq\tgene\t14\t34\t.\t-\t.\tID=gene_1;Name=gene_1;locus_tag=AAA_00002
aaa\tRefseq\tCDS\t14\t34\t.\t-\t.\tID=cds_1;Name=CDS_1;locus_tag=AAA_00002;protein_id="YP.00002"""
    gene_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 1,
                  "end": 10, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 12,
                  "end": 23, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 25,
                  "end": 30, "phase": ".", "strand": "-", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 33,
                  "end": 43, "phase": ".", "strand": "-", "score": "."}]
    cdsf_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 1,
                  "end": 10, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq", "feature": "rRNA", "start": 12,
                  "end": 23, "phase": ".", "strand": "+", "score": "."}]
    cdsr_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 25,
                  "end": 30, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq", "feature": "rRNA", "start": 33,
                  "end": 43, "phase": ".", "strand": "+", "score": "."}]
    attributes_gene = [{"ID": "gene0", "Name": "danA", "locus_tag": "AAA_00001"},
                       {"ID": "gene1", "Name": "AAA_00002", "locus_tag": "AAA_00002"},
                       {"ID": "gene2", "Name": "AAA_00003", "locus_tag": "AAA_00003"},
                       {"ID": "gene3", "Name": "hrcA", "locus_tag": "AAA_00004"}]
    attributes_cdsf = [{"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001", "protein_id": "YP_000001", "Parent": "gene0"},
                      {"ID": "cds1", "Name": "CDS_1", "locus_tag": "AAA_00002"}]
    attributes_cdsr = [{"ID": "cds2", "Name": "CDS_2", "locus_tag": "AAA_00003", "protein_id": "YP_000004", "Parent": "gene2"},
                      {"ID": "cds3", "Name": "CDS_3", "locus_tag": "AAA_00004"}]
    fasta = "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC"
    genes = []
    cdss_f = []
    cdss_r = []
    for index in range(0, 2):
        cdss_f.append(Create_generator(cdsf_dict[index], attributes_cdsf[index], "gff"))
        cdss_r.append(Create_generator(cdsr_dict[index], attributes_cdsr[index], "gff"))
    for index in range(0, 4):
        genes.append(Create_generator(gene_dict[index], attributes_gene[index], "gff"))

    cdsf_result = """>AAA_00001|danA
AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC
>AAA_00002|CDS_1
AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC"""
    cdsr_result = """>AAA_00003
AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC
>AAA_00004|CDS_3
AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC"""
    all_result = """>AAA_00001|CDS_0
示例#16
0
 def test_detect_overlap(self):
     pre_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "sRNA",
         "start": 3,
         "end": 33,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"}
     tar1_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "sRNA",
         "start": 3,
         "end": 33,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_tar1 = {
         "ID": "sRNA0",
         "Name": "srna_0",
         "sRNA_type": "3utr"
     }
     tar2_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "sRNA",
         "start": 53,
         "end": 233,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_tar2 = {
         "ID": "sRNA0",
         "Name": "srna_0",
         "sRNA_type": "5utr"
     }
     pre = Create_generator(pre_dict, attributes_pre, "gff")
     tar1 = Create_generator(tar1_dict, attributes_tar1, "gff")
     tar2 = Create_generator(tar2_dict, attributes_tar2, "gff")
     overlap = False
     overlap = ms.detect_overlap(tar1, pre, "UTR", overlap)
     self.assertTrue(overlap)
     overlap = False
     overlap = ms.detect_overlap(tar2, pre, "UTR", overlap)
     self.assertFalse(overlap)
示例#17
0
 def test_get_feature(self):
     attributes_cds = {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001",
                        "protein_id": "YP_918384.3"}
     attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                    attributes_cds, "gff"))
     self.assertEqual(attributes, "AAA_00001")
     attributes_cds = {"ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3"}
     attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                    attributes_cds, "gff"))
     self.assertEqual(attributes, "YP_918384.3")
     attributes_cds = {"ID": "cds0", "Name": "CDS_0"}
     attributes = circ.get_feature(Create_generator(self.example.cds_dict,
                                                    attributes_cds, "gff"))
     self.assertEqual(attributes, "cds0:122-267_f")
示例#18
0
 def test_gen_batch(self):
     gs.import_wig = Mock_func().mock_import_wig
     out = StringIO()
     lib_t = "wig1 wig2"
     lib_n = "wig3 wig4"
     lib_f = "wig5"
     gff_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "CDS",
         "start": 3,
         "end": 6,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_gff = {
         "ID": "CDS0",
         "Name": "CDS_0",
         "locus_tag": "AAA_00001"
     }
     gff = Create_generator(gff_dict, attributes_gff, "gff")
     seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"}
     gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq)
     self.assertEqual(out.getvalue(), self.example.out_print_wig)
示例#19
0
 def test_set_cutoff(self):
     detects = {}
     detects["express"] = 100
     gff_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "CDS",
         "start": 3,
         "end": 102,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_gff = {
         "ID": "CDS0",
         "Name": "CDS_0",
         "locus_tag": "AAA_00001"
     }
     gff = Create_generator(gff_dict, attributes_gff, "gff")
     diff, cutoff_percent = gea.set_cutoff("tex", "all", "all", detects,
                                           gff)
     self.assertEqual(diff, 100)
     self.assertEqual(cutoff_percent, 0)
     diff, cutoff_percent = gea.set_cutoff("frag", "all", "n_50", detects,
                                           gff)
     self.assertEqual(diff, 100)
     self.assertEqual(cutoff_percent, 50)
     diff, cutoff_percent = gea.set_cutoff("tex", "p_0.5", "n_50", detects,
                                           gff)
     self.assertEqual(diff, 1.0)
     self.assertEqual(cutoff_percent, 0.5)
示例#20
0
 def test_detect_inter_type(self):
     inter_dict = [{
         "seq_id": "aaa",
         "source": "UTR_derived",
         "feature": "Transcript",
         "start": 1,
         "end": 23,
         "phase": ".",
         "strand": "+",
         "score": "."
     }]
     attributes_inter = [{
         "ID": "tran0",
         "Name": "Transcript_0",
         "UTR_type": "3utr"
     }]
     inters = []
     inters.append(
         Create_generator(inter_dict[0], attributes_inter[0], "gff"))
     sd.get_coverage = self.mock.mock_get_coverage
     wigs = {"forward": "wigs_f", "reverse": "wigs_r"}
     data = sd.detect_inter_type(inters, wigs, "test")
     self.assertDictEqual(
         data, {'aaa': {
             'interCDS': [],
             '5utr': [],
             '3utr': ['2']
         }})
     sd.get_coverage = copy.deepcopy(get_coverage)
示例#21
0
 def test_detect_express(self):
     gff_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "CDS",
         "start": 3,
         "end": 5,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_gff = {
         "ID": "CDS0",
         "Name": "CDS_0",
         "locus_tag": "AAA_00001"
     }
     gff = Create_generator(gff_dict, attributes_gff, "gff")
     texs = {"tex1_tex2": 0}
     plots = {"frag": {}}
     detects = {"cond": 0, "track": 0, "import": False, "express": 0}
     gea.detect_express(self.example.wig_frags["aaa"]["frag"]["track_1"],
                        gff, 5, detects, "all", "all", texs, "frag", 2,
                        "track_1", plots, "high", "frag")
     self.assertDictEqual(
         {
             'track': 1,
             'import': False,
             'cond': 0,
             'express': 2
         }, detects)
示例#22
0
 def test_fix_primary_type(self):
     tss_dict = [{
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 2,
         "end": 2,
         "phase": ".",
         "strand": "+",
         "score": "."
     }, {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 3,
         "end": 3,
         "phase": ".",
         "strand": "+",
         "score": "."
     }, {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 4,
         "end": 4,
         "phase": ".",
         "strand": "+",
         "score": "."
     }]
     attributes = [{
         "type": "Primary",
         "ID": "tss0",
         "Name": "TSS:2_+",
         "UTR_length": "Primary_10",
         "associated_gene": "AAA_00001"
     }, {
         "type": "Primary&Internal",
         "ID": "tss1",
         "Name": "TSS:3_+",
         "UTR_length": "Primary_20&Internal_NA",
         "associated_gene": "AAA_00001&AAA_00005"
     }, {
         "type": "Primary&Primary",
         "ID": "tss2",
         "Name": "TSS:4_+",
         "UTR_length": "Primary_40&Primary_60",
         "associated_gene": "AAA_00001&AAA_00004"
     }]
     tsss = []
     for index in range(0, 3):
         tsss.append(
             Create_generator(tss_dict[index], attributes[index], "gff"))
     new_tsss = co.fix_primary_type(tsss, self.example.wigs_f,
                                    self.example.wigs_r)
     utrs = []
     for tss in new_tsss:
         utrs.append(tss.attributes["UTR_length"])
     self.assertEqual(
         set(utrs),
         set(["Internal_NA&Secondary_20", "Primary_60", "Primary_10"]))
示例#23
0
 def mock_read_gff(self, srna_file, data_type):
     srnas = []
     for index in range(0, 2):
         srnas.append(
             Create_generator(self.example.srna_dict[index],
                              self.example.attributes_srna[index], "gff"))
     return srnas
class Example(object):
    wigs = {
        "aaa": {
            "frag_1": {
                "track_1|+|frag": [
                    100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2, 100, 30,
                    23, 21, 21, 2, 100, 30, 23, 21, 21, 2
                ]
            }
        }
    }
    ta_dict = [{
        "seq_id": "aaa",
        "source": "ANNOgesic",
        "feature": "Transcript",
        "start": 4,
        "end": 20,
        "phase": ".",
        "strand": "+",
        "score": "."
    }]
    attributes_tas = [{
        "ID": "tran0",
        "Name": "Transcript_0",
        "detect_lib": "fragmented&tex_notex"
    }]
    tas = []
    for index in range(0, 1):
        tas.append(
            Create_generator(ta_dict[index], attributes_tas[index], "gff"))
示例#25
0
 def test_compare_tran(self):
     tran_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "Transcript",
         "start": 100,
         "end": 500,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_tran = {"ID": "tran0", "Name": "Tran_0"}
     out = StringIO()
     gffs = read_dict(3, self.example.gff_dict, self.example.attributes_gff)
     tran = Create_generator(tran_dict, attributes_tran, "gff")
     c_gff.compare_tran(gffs, tran, out)
     datas, attributes = extract_info(out.getvalue(), "string")
     parents = []
     for attribute in attributes:
         for element in attribute:
             if "Parent" in element:
                 parents.append(element)
     self.assertEqual(set(datas),
                      set(["aaa\tRefseq\tCDS\t160\t300\t.\t+\t."]))
     self.assertEqual(set(parents), set(["Parent=tran0"]))
     out.close()
示例#26
0
 def test_fix_primary_type(self):
     wigs = {"aaa": {"track_1": [{"pos": 1, "coverage": 200},
                                 {"pos": 2, "coverage": 300},
                                 {"pos": 3, "coverage": 400},
                                 {"pos": 4, "coverage": 600},
                                 {"pos": 5, "coverage": 650},
                                 {"pos": 6, "coverage": 655}]}}
     tss_dict = [{"seq_id": "aaa", "source": "Refseq",
                  "feature": "TSS", "start": 3,
                  "end": 3, "phase": ".", "strand": "+", "score": "."},
                 {"seq_id": "aaa", "source": "Refseq",
                  "feature": "TSS", "start": 5,
                  "end": 5, "phase": ".", "strand": "+", "score": "."}]
     attributes_tss = [{"ID": "CDS0", "Name": "CDS_0",
                        "type": "Primary,Primary",
                        "associated_gene": "AAA_00001,AAA_00002",
                        "utr_length": "Primary_25,Primary_200"},
                       {"ID": "CDS1", "Name": "CDS_1",
                        "type": "Primary,Antisense",
                        "associated_gene": "AAA_00001,AAA_00004",
                        "utr_length": "Primary_27,Antisense_NA"}]
     tsss = []
     for index in range(0, 2):
         tsss.append(Create_generator(
             tss_dict[index], attributes_tss[index], "gff"))
     mm.fix_primary_type(tsss, wigs, "test")
     self.assertEqual(tsss[0].attributes["type"], "Primary")
     self.assertEqual(tsss[1].attributes["type"], "Antisense,Secondary")
 def entries(self, fh):
     for line in fh:
         if "gff" in line:
             lists = self.example.gff_dict
             attributes = self.example.attributes_gff
             num = 3
         elif "tran" in line:
             lists = self.example.tran_dict
             attributes = self.example.attributes_tran
             num = 3
         elif "term" in line:
             lists = self.example.term_dict
             attributes = self.example.attributes_term
             num = 3
         elif "tss" in line:
             lists = self.example.tss_dict
             attributes = self.example.attributes_tss
             num = 3
         elif "utr5" in line:
             lists = self.example.utr5_dict
             attributes = self.example.attributes_utr5
             num = 2
         elif "utr3" in line:
             lists = self.example.utr3_dict
             attributes = self.example.attributes_utr3
             num = 2
     for index in range(0, num):
         yield Create_generator(lists[index], attributes[index], "gff")
示例#28
0
 def test_compare_term(self):
     ta_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 138,
                "end": 540, "phase": ".", "strand": "+", "score": "."}
     attributes_ta = {"ID": "tran0", "Name": "Transcript_0"}
     ta = Create_generator(ta_dict, attributes_ta, "gff")
     term = du.compare_term(ta, self.example.terms, 5)
     self.assertEqual(term.start, 530)
示例#29
0
 def test_compare_wigs(self):
     gff_dict = {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "CDS",
         "start": 3,
         "end": 5,
         "phase": ".",
         "strand": "+",
         "score": "."
     }
     attributes_gff = {
         "ID": "CDS0",
         "Name": "CDS_0",
         "locus_tag": "AAA_00001"
     }
     gff = Create_generator(gff_dict, attributes_gff, "gff")
     texs = {"tex1_tex2": 0}
     replicates = {"tex": 1, "frag": 1}
     stats = {
         "CDS": {
             "total": {
                 "total": 0,
                 "least_one": 0,
                 "all": 0,
                 "none": 0
             },
             "aaa": {
                 "total": 0,
                 "least_one": 0,
                 "all": 0,
                 "none": 0
             }
         }
     }
     outs = {"CDS": {"least_one": [], "all": [], "none": []}}
     plots = {}
     gea.compare_wigs(self.example.wig_texs, gff, 2, texs, replicates,
                      stats["CDS"], outs["CDS"], plots, "high", 5, "all",
                      "all")
     self.assertDictEqual(
         stats, {
             'CDS': {
                 'total': {
                     'tex': 1,
                     'none': 0,
                     'total': 0,
                     'all': 1,
                     'least_one': 1
                 },
                 'aaa': {
                     'tex': 1,
                     'none': 0,
                     'total': 0,
                     'all': 1,
                     'least_one': 1
                 }
             }
         })
示例#30
0
 def test_del_repeat(self):
     tss_dict = [{
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 2,
         "end": 2,
         "phase": ".",
         "strand": "+",
         "score": "."
     }, {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 22,
         "end": 22,
         "phase": ".",
         "strand": "+",
         "score": "."
     }, {
         "seq_id": "aaa",
         "source": "Refseq",
         "feature": "TSS",
         "start": 122,
         "end": 122,
         "phase": ".",
         "strand": "-",
         "score": "."
     }]
     attributes = [{
         "type": "Primary",
         "ID": "tss0",
         "Name": "TSS:2_+",
         "UTR_length": "Primary_100",
         "associated_gene": "AAA_00001"
     }, {
         "type": "Primary&Primary",
         "ID": "tss1",
         "Name": "TSS:22_+",
         "UTR_length": "Primary_20&Primary_50",
         "associated_gene": "AAA_00004&AAA_00005"
     }, {
         "type": "Secondary&Internal",
         "ID": "tss2",
         "Name": "TSS:122_-",
         "UTR_length": "Secondary_220&Internal_NA",
         "associated_gene": "AAA_00008&AAA_00009"
     }]
     tsss = []
     for index in range(0, 3):
         tsss.append(
             Create_generator(tss_dict[index], attributes[index], "gff"))
     co.del_repeat(tsss)
     utrs = []
     for tss in tsss:
         utrs.append(tss.attributes["UTR_length"])
     self.assertEqual(
         set(utrs),
         set(["Primary_100", "Primary_20", "Internal_NA&Secondary_220"]))