示例#1
0
 def test_len(self):
     # 1234567890
     # ==========
     gene1 = Gene("chr1", "a", "b", "1", "10", "+")
     self.assertEqual(gene1.len(), 10)
     # 01234567890
     # ===========
     gene2 = Gene("chr1", "a", "b", "10", "20", "+")
     self.assertEqual(gene2.len(), 11)
示例#2
0
 def test_set_type_of_5_prime_tss(self):
     self.tss_gene_mapper.genes_and_5_prime_tss = {}
     self.tss_gene_mapper.tss_and_hit_genes = {}
     tss_primary = TSS("genomeX", 40, "+")
     tss_secondary = TSS("genomeX", 30, "+")
     gene = Gene("genomeX", "g", "g", 50, 100, "+")
     self.tss_gene_mapper.genes_and_5_prime_tss[gene] = [[10, tss_primary],
                                                         [
                                                             20,
                                                             tss_secondary
                                                         ]]
     self.tss_gene_mapper.tss_and_hit_genes[tss_primary] = {}
     self.tss_gene_mapper.tss_and_hit_genes[tss_secondary] = {}
     self.tss_gene_mapper.tss_and_hit_genes[tss_primary][gene] = {
         "location": tssgenemapper.loc_5_prime_str,
         "orientation": tssgenemapper.sense_str,
         "distance": 10,
         "tss_type": None
     }
     self.tss_gene_mapper.tss_and_hit_genes[tss_secondary][gene] = {
         "location": tssgenemapper.loc_5_prime_str,
         "orientation": tssgenemapper.sense_str,
         "distance": 20,
         "tss_type": None
     }
     self.tss_gene_mapper._set_type_of_5_prime_tss()
     self.assertEqual(
         self.tss_gene_mapper.tss_and_hit_genes[tss_primary][gene]
         ["tss_type"], tssgenemapper.primary_str)
     self.assertEqual(
         self.tss_gene_mapper.tss_and_hit_genes[tss_secondary][gene]
         ["tss_type"], tssgenemapper.secondary_str)
示例#3
0
 def test_init_without_extra(self):
     gene = Gene("chr1", "gene_2342", "hacY", "3", "83", "+")
     self.assertEqual(gene.seq_id, "chr1")
     self.assertEqual(gene.gene_id, "gene_2342")
     self.assertEqual(gene.name, "hacY")
     self.assertEqual(gene.start, 3)
     self.assertEqual(gene.end, 83)
     self.assertEqual(gene.strand, "+")
     self.assertFalse(hasattr(gene, "extra"))
示例#4
0
 def test_init_with_extra(self):
     gene = Gene("chr2", "gene_0005", "hacZ", "15", "30", "-", extra="mope")
     self.assertEqual(gene.seq_id, "chr2")
     self.assertEqual(gene.gene_id, "gene_0005")
     self.assertEqual(gene.name, "hacZ")
     self.assertEqual(gene.start, 15)
     self.assertEqual(gene.end, 30)
     self.assertEqual(gene.strand, "-")
     self.assertEqual(gene.extra, "mope")
示例#5
0
 def create_gene_list(self):
     self.gene_list = []
     gff_parser = Gff3Parser()
     for entry in gff_parser.entries(self.gff_fh):
         if entry.feature != "gene":
             continue
         self.gene_list.append(
             Gene(entry.seq_id, entry.attributes["locus_tag"],
                  entry.attributes["Name"], entry.start, entry.end,
                  entry.strand))
示例#6
0
 def _try_gene_merge(self, gene, row):
     overlapping_gene = self._sql_row_to_gene(row)
     if not self._have_sufficient_overlap(gene, overlapping_gene) is True:
         return
     if self._genes_are_identical(gene, overlapping_gene) is True:
         return
     start = min(gene.start, overlapping_gene.start)
     end = max(gene.end, overlapping_gene.end)
     gene_id = "%s_merged_with_%s" % (overlapping_gene.gene_id,
                                      gene.gene_id)
     self._remove_row(row)
     self._store_gene_in_db(
         Gene(gene.seq_id, gene_id, gene_id, start, end, gene.strand))
示例#7
0
 def test_len(self):
     # 1234567890
     # ==========
     gene1 = Gene("chr1", "a", "b", "1", "10", "+")
     self.assertEqual(gene1.len(), 10)
     # 01234567890
     # ===========
     gene2 = Gene("chr1", "a", "b", "10", "20", "+")
     self.assertEqual(gene2.len(), 11)
示例#8
0
 def _try_multi_gene_merge(self, gene, rows):
     overlapping_genes = [self._sql_row_to_gene(row) for row in rows]
     genes_to_merge = []
     rows_to_remove = []
     for overlapping_gene, row in zip(overlapping_genes, rows):
         if not self._have_sufficient_overlap(gene,
                                              overlapping_gene) is True:
             continue
         genes_to_merge.append(overlapping_gene)
         rows_to_remove.append(row)
     for row in rows_to_remove:
         self._remove_row(row)
     start = min([gene.start] + [gene.start for gene in genes_to_merge])
     end = max([gene.end] + [gene.end for gene in genes_to_merge])
     gene_id = "_merged_with_".join(
         [gene.gene_id for gene in genes_to_merge] + [gene.gene_id])
     self._store_gene_in_db(
         Gene(gene.seq_id, gene_id, gene_id, start, end, gene.strand))
示例#9
0
 def test_init_start_end_sorting(self):
     """Test that the start and end position are ordered"""
     gene = Gene("chr1", "gene_2342", "hacY", "1000", "5", "+")
     self.assertEqual(gene.start, 5)
     self.assertEqual(gene.end, 1000)
示例#10
0
 def test_has_5_prime_association_10(self):
     """None - in 3' region"""
     tss = TSS("genomeX", 5, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(
         self.tss_gene_mapper._has_5_prime_association(tss, gene), None)
示例#11
0
 def test_has_5_prime_association_9(self):
     """True - In 5' region"""
     tss = TSS("genomeX", 105, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(
         self.tss_gene_mapper._has_5_prime_association(tss, gene), True)
示例#12
0
 def test_has_5_prime_association_8(self):
     """None - antisense"""
     tss = TSS("genomeX", 600, "+")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(
         self.tss_gene_mapper._has_5_prime_association(tss, gene), None)
示例#13
0
 def test_has_antisense_association_8(self):
     """True - antisense"""
     tss = TSS("genomeX", 200, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_antisense_association(tss, gene), True)
示例#14
0
 def _merge_genes(self, gene_1, gene_2):
     start = min([gene_1.start, gene_2.end])
     end = max([gene_1.start, gene_2.end])
     name = "_merged_with_".join([gene_1.name, gene_2.name])
     gene_id = "_merged_with_".join([gene_1.gene_id, gene_2.gene_id])
     return Gene(gene_1.seq_id, gene_id, name, start, end, gene_1.strand)
示例#15
0
 def test_5_prime_dist_4(self):
     tss = TSS("genomeX", 120, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(self.tss_gene_mapper._5_prime_dist(tss, gene), 20)
示例#16
0
 def _sql_row_to_gene(self, row):
     return Gene(row[2], row[1], row[1], row[4], row[5], row[3])
示例#17
0
 def test_has_internal_association_8(self):
     """None - on the first base => 5' leaderless """
     tss = TSS("genomeX", 100, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(
         self.tss_gene_mapper._has_internal_association(tss, gene), None)
示例#18
0
 def test_has_internal_association_7(self):
     """True - on the last base """
     tss = TSS("genomeX", 10, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "-")
     self.assertEqual(
         self.tss_gene_mapper._has_internal_association(tss, gene), True)
示例#19
0
 def test_has_internal_association_3(self):
     """None - 3' region """
     tss = TSS("genomeX", 150, "+")
     gene = Gene("genomeX", "g", "g", 10, 100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_internal_association(tss, gene), None)
示例#20
0
 def test_has_internal_association_1(self):
     """True - internal"""
     tss = TSS("genomeX", 20, "+")
     gene = Gene("genomeX", "g", "g", 10, 100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_internal_association(tss, gene), True)
示例#21
0
 def test_has_antisense_association_10(self):
     """None - out of range"""
     tss = TSS("genomeX", 201, "-")
     gene = Gene("genomeX", "g", "g", 10, 100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_antisense_association(tss, gene), None)
示例#22
0
 def test_has_5_prime_association_3(self):
     """True - Leaderless TSS"""
     tss = TSS("genomeX", 10, "+")
     gene = Gene("genomeX", "g", "g", 10, 100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_5_prime_association(tss, gene), True)
示例#23
0
 def test_has_5_prime_association_4(self):
     """None - in 5' but too far away """
     tss = TSS("genomeX", 10, "+")
     gene = Gene("genomeX", "g", "g", 1000, 1100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_5_prime_association(tss, gene), None)
示例#24
0
parser = argparse.ArgumentParser(description=__description__)
parser.add_argument("gff_file", type=argparse.FileType("r"))
parser.add_argument("output_file", type=argparse.FileType("w"))
parser.add_argument("--margin", type=int, default=0)
parser.add_argument("--plus_only", default=False, action="store_true")
args = parser.parse_args()
# Build gene list
gene_list = []
gff_parser = Gff3Parser()
region_entry = None
for entry in gff_parser.entries(args.gff_file):
    if entry.feature == "region":
        region_entry = entry
        continue
    gene_list.append(Gene(
        entry.seq_id, "", "", entry.start, entry.end,
        entry.strand))
# Find IGRs and generate GFF file
igr_finder = IGRFinder()
args.output_file.write("##gff-version 3\n")

strands = ["+", "-"]
if args.plus_only is True:
    strands = ["+"]

for start, end in igr_finder.find_igrs(gene_list, region_entry.end):
    start = start + args.margin
    end = end - args.margin
    if end <= start:
        continue
    for strand in strands:
示例#25
0
 def test_has_antisense_association_7(self):
     """True - antisense and inside the range"""
     tss = TSS("genomeX", 900, "-")
     gene = Gene("genomeX", "g", "g", 1000, 1100, "+")
     self.assertEqual(
         self.tss_gene_mapper._has_antisense_association(tss, gene), True)