示例#1
0
class TestMaize(unittest.TestCase):
    def setUp(self):
        handle = open(
            '/Users/gturco/code/freeling_lab/find_cns_gturco/pipeline/tests/blast_3.txt'
        )
        fh = handle.readlines()
        self.blast_str = ' , '.join(fh)
        self.unmasked_fasta = Fasta('/Users/gturco/find_cns/maize_v2_UM.fasta')

        self.qbed = Bed('/Users/gturco/rice_maize/rice_v6.bed')
        self.qbed.fill_dict()
        self.sbed = Bed('/Users/gturco/maize/maize_v2.bed',
                        '/Users/gturco/maize/maize_v2.fasta')
        self.sbed.fill_dict()
        self.sfeat = self.sbed.accn('GRMZM2G086714')
        self.qfeat = self.qbed.accn('Os09g27050')

    def test_get_cmd(self):
        sfasta = 'data/rice_v6_maize_v2/maize_v2_split/2.fasta'
        qfasta = 'data/rice_v6_maize_v2/rice_v6_split/4.fasta'

    def test_parse_balse(self):
        orientaion = -1
        cns = parse_blast(self.blast_str, orientaion, self.qfeat, self.sfeat,
                          self.qbed, self.sbed, 12000, 26000,
                          self.unmasked_fasta)
        print cns
示例#2
0
class TestPseudo(unittest.TestCase):
    def setUp(self):
        self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
        self.qallbed.fill_dict()
        self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
        self.sallbed.fill_dict()
        self.saccn = self.sallbed.accn("Si000834m")
        blastfh = open("blast_res")
        self.blast = blastfh.read()
        self.d, self.pseudo = group_cds(self.blast, self.saccn)

    def test_group_cds_1(self):
        self.assertEqual(len(self.d.keys()), 4)
        total_values = []
        for key in self.d.keys():
            values = len(self.d[key])
            total_values.append(values)
        self.assertEqual(sum(total_values), 38)

    def test_group_cds_2(self):
        blast_2fh = open("blast_2")
        blast_2 = blast_2fh.read()

        d, pseudo = group_cds(blast_2, self.sallbed.accn("Si002524m"))

        self.assertEqual(len(d.keys()), 5)
        for key in d.keys():
            # logging.info('key: {0}'.format(key))

            self.assertEqual(1, len(d[key]))

    def test_append_to_included_groups(self):
        locs = [1, 2, 3, 4]
        group_dict = {(2, 5): [], (3, 6): [], (9, 8): []}

        result_dict = append_to_included_groups(locs, group_dict)
        expected = {(2, 5): [(1, 2, 3, 4)], (3, 6): [(1, 2, 3, 4)], (9, 8): []}

        self.assertEquals(expected, result_dict)

    def test_remove_crossing_hit(self):
        qaccn = self.qallbed.accn("Os01g01890")
        for group_key in self.d.keys():
            exon_hits = self.d[group_key]
            non_crossing = remove_crossing_hits(exon_hits, qaccn, self.saccn)
            if len(non_crossing) > 1:
                mid, start, stop = bites(non_crossing)

    def test_find_orf(self):
        qaccn = self.qallbed.accn("Os01g01295")
        orf = find_orf(self.qallbed, qaccn)
        self.assertEqual(orf + 1, 141084)

    def test_find_orf_neg(self):
        saccn = self.sallbed.accn("Si001539m")
        orf = find_orf(self.sallbed, saccn)
        self.assertEqual(orf, 7662777)
示例#3
0
def main(cns_path, fmt, query_bed_path, subject_bed_path):
    cns_dic = cns_to_dic(cns_path, fmt)
    query_bed = Bed(query_bed_path)
    subject_bed = Bed(subject_bed_path)
    utr_dict = {}
    for cns in cns_dic:
        cns['qstop'] = int(cns['qstop'])
        cns['qstart'] = int(cns['qstart'])
        cns['sstop'] = int(cns['sstop'])
        cns['sstart'] = int(cns['sstart'])

        qfeat = query_bed.accn(cns['qaccn'])
        sfeat = subject_bed.accn(cns['saccn'])
        qgene_space_start = min(qfeat['locs'])[0]
        qgene_space_end = max(qfeat['locs'])[1]
        qgene_space_poly = LineString([(0.0, qgene_space_start),
                                       (0.0, qgene_space_end)])
        qgene_poly = LineString([(0.0, qfeat['start']), (0.0, qfeat['end'])])
        sgene_poly = LineString([(0.0, sfeat['start']), (0.0, sfeat['end'])])
        # if intron of one dont need to check other
        qcns = LineString([(0, cns['qstart']), (0, cns['qstop'])])
        scns = LineString([(0, cns['sstart']), (0, cns['sstop'])])
        cns_type(cns, qgene_space_poly, qgene_poly, sgene_poly, scns, qcns,
                 qgene_space_start, qfeat)
        create_utr_list(utr_dict, qfeat, cns, "q")
        create_utr_list(utr_dict, sfeat, cns, "s")
    for cns in cns_dic:
        if cns['type'] == "5-prox_dist":
            qgene_start = min(utr_dict[cns['qaccn']])
            qgene_stop = max(utr_dict[cns['qaccn']])
            # sstart = min(utr_dict[cns['saccn']])
            # sstop =  max(utr_dict[cns['saccn']])
            five_diff_pos = abs(qgene_start - cns["qstop"])
            five_diff_neg = abs(qgene_stop - cns["qstart"])
            if five_diff_pos <= 1000 and cns[
                    "qstrand"] == "+" or five_diff_neg <= 1000 and cns[
                        "qstrand"] == "-":
                cns["type"] = "5-proximal"
            elif five_diff_pos > 1000 and cns[
                    "qstrand"] == "+" or five_diff_neg > 1000 and cns[
                        "qstrand"] == "-":
                cns["type"] = "5-distal"
        elif cns['type'] == "3-prox_dist":
            qgene_start = min(utr_dict[cns['qaccn']])
            qgene_stop = max(utr_dict[cns['qaccn']])
            three_diff_pos = abs(cns["qstart"] - qgene_stop)
            three_diff_neg = abs(cns["qstop"] - qgene_start)
            if three_diff_pos <= 1000 and cns[
                    "qstrand"] == "+" or three_diff_neg <= 1000 and cns[
                        "qstrand"] == "-":
                cns["type"] = "3-proximal"
            elif three_diff_pos > 1000 and cns[
                    "qstrand"] == "+" or three_diff_neg > 1000 and cns[
                        "qstrand"] == "-":
                cns["type"] = "3-distal"
    return cns_dic
示例#4
0
def utr_present(cns_pck,query_bed_path, UTR):
  "checks to see if qaccn has utr region"
  db = MySQLdb.connect(host="127.0.0.1", user="******", db = "rice_gene_table")
  cursor = db.cursor()
  cns_handle = open(cns_pck)
  cns_pickle = pickle.load(cns_handle)
  query_bed = Bed(query_bed_path)
  for cns in cns_pickle:
    qfeat = query_bed.accn(cns['qaccn'])
    if qfeat['strand'] == "+":
      end = qfeat['end']
      start = qfeat["start"]
    else:
      end = qfeat['start']
      start = qfeat["end"]
    if UTR == 3:
      if end == min(qfeat['locs'])[0] or end == max(qfeat['locs'])[1]:
        stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.3_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
        print stmt
        cursor.execute(stmt)
    elif UTR == 5:
      if start == min(qfeat['locs'])[0] or start == max(qfeat['locs'])[1]:
        stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.5_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
        print stmt
        cursor.execute(stmt)
示例#5
0
def utr_present(cns_pck, query_bed_path, UTR):
    "checks to see if qaccn has utr region"
    db = MySQLdb.connect(host="127.0.0.1", user="******", db="rice_gene_table")
    cursor = db.cursor()
    cns_handle = open(cns_pck)
    cns_pickle = pickle.load(cns_handle)
    query_bed = Bed(query_bed_path)
    for cns in cns_pickle:
        qfeat = query_bed.accn(cns['qaccn'])
        if qfeat['strand'] == "+":
            end = qfeat['end']
            start = qfeat["start"]
        else:
            end = qfeat['start']
            start = qfeat["end"]
        if UTR == 3:
            if end == min(qfeat['locs'])[0] or end == max(qfeat['locs'])[1]:
                stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.3_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(
                    cns['qaccn'])
                print stmt
                cursor.execute(stmt)
        elif UTR == 5:
            if start == min(qfeat['locs'])[0] or start == max(
                    qfeat['locs'])[1]:
                stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.5_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(
                    cns['qaccn'])
                print stmt
                cursor.execute(stmt)
示例#6
0
def main(cns_path, fmt, query_bed_path, subject_bed_path):
  cns_dic = cns_to_dic(cns_path,fmt)
  query_bed = Bed(query_bed_path)
  subject_bed = Bed(subject_bed_path)
  utr_dict = {}
  for cns in cns_dic:
    cns['qstop'] = int(cns['qstop'])
    cns['qstart'] = int(cns['qstart'])
    cns['sstop'] = int(cns['sstop'])
    cns['sstart'] = int(cns['sstart'])
 
    qfeat = query_bed.accn(cns['qaccn'])
    sfeat = subject_bed.accn(cns['saccn']) 
    qgene_space_start = min(qfeat['locs'])[0]
    qgene_space_end = max(qfeat['locs'])[1]
    qgene_space_poly = LineString([(0.0, qgene_space_start), (0.0, qgene_space_end)])
    qgene_poly = LineString([(0.0, qfeat['start']), (0.0, qfeat['end'])])
    sgene_poly = LineString([(0.0, sfeat['start']), (0.0, sfeat['end'])])
    # if intron of one dont need to check other
    qcns = LineString([(0,cns['qstart']),(0,cns['qstop'])])
    scns = LineString([(0,cns['sstart']),(0,cns['sstop'])])
    cns_type(cns,qgene_space_poly, qgene_poly, sgene_poly, scns, qcns,qgene_space_start,qfeat)
    create_utr_list(utr_dict,qfeat, cns,"q")
    create_utr_list(utr_dict,sfeat, cns,"s")
  for cns in cns_dic:
    if cns['type'] == "5-prox_dist":
      qgene_start = min(utr_dict[cns['qaccn']])
      qgene_stop =  max(utr_dict[cns['qaccn']])
      # sstart = min(utr_dict[cns['saccn']])
      # sstop =  max(utr_dict[cns['saccn']])
      five_diff_pos = abs(qgene_start - cns["qstop"])
      five_diff_neg = abs(qgene_stop - cns["qstart"])
      if five_diff_pos <=1000 and cns["qstrand"] == "+" or five_diff_neg <=1000 and cns["qstrand"] == "-":
        cns["type"] = "5-proximal"
      elif five_diff_pos >1000 and cns["qstrand"] == "+" or five_diff_neg >1000 and cns["qstrand"] == "-":
        cns["type"] = "5-distal"
    elif cns['type'] == "3-prox_dist":
      qgene_start = min(utr_dict[cns['qaccn']])
      qgene_stop =  max(utr_dict[cns['qaccn']])
      three_diff_pos =  abs(cns["qstart"] - qgene_stop)
      three_diff_neg =  abs(cns["qstop"] - qgene_start)
      if three_diff_pos <=1000 and cns["qstrand"] == "+" or three_diff_neg <=1000 and cns["qstrand"] == "-":
        cns["type"] = "3-proximal"
      elif three_diff_pos > 1000 and cns["qstrand"] == "+" or three_diff_neg > 1000 and cns["qstrand"] == "-":
        cns["type"] = "3-distal"
  return cns_dic
示例#7
0
def main(bedfile,seqfile, gene_list):
    print "position,gene,element"
    b = Bed(bedfile)
    f = Fasta(seqfile)
    for gene_name in gene_list:
        gene = b.accn(gene_name)
        promf, promr = get_prom(f, gene)
        print gene_name
        mf = find_seq(promf)
        mr = find_seq(promr)
        make_graph(mf,mr, gene_name)
示例#8
0
class TestMaize(unittest.TestCase):
    def setUp(self):
        handle = open("/Users/gturco/code/freeling_lab/find_cns_gturco/pipeline/tests/blast_3.txt")
        fh = handle.readlines()
        self.blast_str = " , ".join(fh)
        self.unmasked_fasta = Fasta("/Users/gturco/find_cns/maize_v2_UM.fasta")

        self.qbed = Bed("/Users/gturco/rice_maize/rice_v6.bed")
        self.qbed.fill_dict()
        self.sbed = Bed("/Users/gturco/maize/maize_v2.bed", "/Users/gturco/maize/maize_v2.fasta")
        self.sbed.fill_dict()
        self.sfeat = self.sbed.accn("GRMZM2G086714")
        self.qfeat = self.qbed.accn("Os09g27050")

    def test_get_cmd(self):
        sfasta = "data/rice_v6_maize_v2/maize_v2_split/2.fasta"
        qfasta = "data/rice_v6_maize_v2/rice_v6_split/4.fasta"

    def test_parse_balse(self):
        orientaion = -1
        cns = parse_blast(
            self.blast_str, orientaion, self.qfeat, self.sfeat, self.qbed, self.sbed, 12000, 26000, self.unmasked_fasta
        )
        print cns
def main(cns_file,bedpath,fastapath):
    genespace = get_genespace(cns_file)
    bed = Bed(bedpath)
    f = Fasta(fastapath)
    handles = ['3_utr','5_utr','intronic','5_prox','5_distal','3_prox','3_distal']
    fhs = open_files(handles)
    for gene in genespace.keys():
        #cnsspace = genespace[gene]
        try:
            accn = bed.accn(gene)
        except KeyError: continue
        cnsspace = [(max(0,accn['start'] - 12000), accn['end'] + 12000)]
        #print "GENESPACE {0}".format(cnsspace)
        locs = accn['locs']
        locs.sort()
        cnsspace.sort()
        write_to_pos_fasta(bed,accn,locs,cnsspace,fhs,f)
示例#10
0
def main(cns_file, bedpath, fastapath):
    genespace = get_genespace(cns_file)
    bed = Bed(bedpath)
    f = Fasta(fastapath)
    handles = [
        '3_utr', '5_utr', 'intronic', '5_prox', '5_distal', '3_prox',
        '3_distal'
    ]
    fhs = open_files(handles)
    for gene in genespace.keys():
        #cnsspace = genespace[gene]
        try:
            accn = bed.accn(gene)
        except KeyError:
            continue
        cnsspace = [(max(0, accn['start'] - 12000), accn['end'] + 12000)]
        #print "GENESPACE {0}".format(cnsspace)
        locs = accn['locs']
        locs.sort()
        cnsspace.sort()
        write_to_pos_fasta(bed, accn, locs, cnsspace, fhs, f)
class TestPerfectTargetRegion(unittest.TestCase):
    def setUp(self):
        self.gene_name = "Os01g02110"
        self.bed = Bed("ricetest.bed")
        self.fasta = Fasta("ricetest.fasta")
        self.gene = self.bed.accn(self.gene_name)
        self.exons = self.gene['locs']


    def test_rel_pos(self):

        self.assertEqual((376,486),rel_pos(self.gene,self.exons[0]))
        self.assertEqual((1289,1789),rel_pos(self.gene,self.exons[-1]))

    def test_fasta(self):
        exon = self.exons[-1]
        seq = self.fasta[self.gene_name][:]
        self.assertTrue(1789 <= len(seq))

    def test_pattern(self):
        e = exons[-1]
        start, stop = rel_pos(self.gene,e)
        for exon in self.exons: