Пример #1
0
def main(assembly, filename):
    a = Assembly(assembly)
    tmap = a.get_transcript_mapping()

    # Get total num of reads

    f = open(filename)
    g = open('simulation/count_simulation.txt','wb')

    header = ['ID','Count','RPKM','Chrom','Start','End','Strand','GeneName','Length','Type','Sense','Synonyms']
    g.write('\t'.join(header)+'\n')

    for line in f:
        loc, tid, coding, length, \
            expr_fraction, expr_number, lib_fraction, lib_number, seq_fraction, seq_number, \
            cov_fraction, chisq, var_coeff = line.split('\t')
        chrom,coord = loc.split(':')
        start,end = coord[:-1].split('-')
        strand = '1' if coord[-1] == 'W' else '-1'
        nreads = float(seq_number)
        if nreads != 0:
            ntotal = nreads / float(seq_fraction)
            rpkm = 1e9 * nreads / (float(length) * ntotal)
        else:
            rpkm = 0.0
        t = tmap.get(tid)
        if t is not None:
            newline = [tid, seq_number, str(rpkm), chrom,start,end,strand, t.gene_name,length,'transcript','.','.']
            g.write('\t'.join(newline)+'\n')

    f.close()
    g.close()
Пример #2
0
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.assembly.genrep = GenRep(url='http://bbcftools.epfl.ch/genrep/',
                                   root='/db/genrep')
     self.assembly.intype = '0'
     self.chromosomes = {
         (3066, u'NC_003279', 6): {
             'length': 15072421,
             'name': u'chrI'
         },
         (3067, u'NC_003280', 7): {
             'length': 15279323,
             'name': u'chrII'
         },
         (3068, u'NC_003281', 8): {
             'length': 13783681,
             'name': u'chrIII'
         },
         (3069, u'NC_003282', 5): {
             'length': 17493785,
             'name': u'chrIV'
         },
         (3070, u'NC_003283', 8): {
             'length': 20919568,
             'name': u'chrV'
         },
         (3071, u'NC_003284', 7): {
             'length': 17718854,
             'name': u'chrX'
         },
         (2948, u'NC_001328', 1): {
             'length': 13794,
             'name': u'chrM'
         }
     }
Пример #3
0
def main(assembly, filename):
    a = Assembly(assembly)
    tmap = a.get_transcript_mapping()

    # Get total num of reads

    f = open(filename)
    g = open('simulation/count_simulation.txt', 'wb')

    header = [
        'ID', 'Count', 'RPKM', 'Chrom', 'Start', 'End', 'Strand', 'GeneName',
        'Length', 'Type', 'Sense', 'Synonyms'
    ]
    g.write('\t'.join(header) + '\n')

    for line in f:
        loc, tid, coding, length, \
            expr_fraction, expr_number, lib_fraction, lib_number, seq_fraction, seq_number, \
            cov_fraction, chisq, var_coeff = line.split('\t')
        chrom, coord = loc.split(':')
        start, end = coord[:-1].split('-')
        strand = '1' if coord[-1] == 'W' else '-1'
        nreads = float(seq_number)
        if nreads != 0:
            ntotal = nreads / float(seq_fraction)
            rpkm = 1e9 * nreads / (float(length) * ntotal)
        else:
            rpkm = 0.0
        t = tmap.get(tid)
        if t is not None:
            newline = [
                tid, seq_number,
                str(rpkm), chrom, start, end, strand, t.gene_name, length,
                'transcript', '.', '.'
            ]
            g.write('\t'.join(newline) + '\n')

    f.close()
    g.close()
Пример #4
0
 def adn(self, ass, chr, id, **kw):
     id = int(id)
     g = GenRep()
     chrs = g.get_genrep_objects('chromosomes',
                                 'chromosome',
                                 filters={'name': chr},
                                 params={'assembly_id': ass})
     ass = Assembly(ass)
     for chrid, chrs in ass.chromosomes.iteritems():
         if chrs['name'] == chr:
             start = id * chunk
             end = start + chunk
             return g.get_sequence(chrid[0], [[start, end]])
     return ''
Пример #5
0
def merge_junc_files(trackList, assembly):
    out = track('all.junc',
                format='txt',
                fields=['chr', 'start', 'end', 'strand', 'score'])
    from bbcflib.genrep import Assembly
    a = Assembly(assembly)
    for c in a.chromosomes:
        tl = [
            track(t,
                  fields=['chr', 'start', 'end', 'strand', 'score'],
                  format='txt').read(str(c[0]) + '_' + c[1] + '.' + str(c[2]))
            for t in trackList
        ]
        #all = concatenate(tl,remove_duplicates=True)
        all = concatenate(tl,
                          group_by=['chr', 'start', 'end'],
                          aggregate={'score': lambda x: sum(x)})
        out.write(all, mode='append')
Пример #6
0
def add_new_sequence(sequence):
    '''
    Method called when a new sequence is created on GDV.
    It should import fast from JBrowse
    '''
    print 'add new sequence'
    file_url = Assembly(sequence).get_sqlite_url()
    print file_url
    out = os.path.join(filemanager.temporary_directory(), 'Genes.sql')
    fileinfo = filemanager.FileInfo(inputtype='url',
                                    inpath=file_url,
                                    trackname='Genes',
                                    extension='sql',
                                    outpath=out,
                                    admin=True)
    print fileinfo
    user = DBSession.query(User).filter(
        User.key == constants.admin_user_key()).first()
    user_info = {'id': user.id, 'name': user.name, 'email': user.email}
    sequence_info = {'id': sequence.id, 'name': sequence.name}

    # track
    t = Track()
    t.name = fileinfo.trackname
    t.sequence_id = sequence.id
    t.user_id = user.id
    DBSession.add(t)
    DBSession.flush()
    # send task
    async = tasks.new_input.delay(user_info, fileinfo, sequence_info, t.id)
    t.task_id = async .task_id
    DBSession.add(t)

    sequence.default_tracks.append(t)
    DBSession.add(sequence)
    DBSession.flush()
Пример #7
0
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.root = self.assembly.genrep.root
     self.intype = 0
     """
Пример #8
0
class Test_Assembly(unittest.TestCase):
    def setUp(self):
        self.assembly = Assembly('ce6')
        self.root = self.assembly.genrep.root
        self.intype = 0
        """
        Gene Y54E2A.11 (-1 to each Ensembl start by convention)
        4 transcripts, Y54E2A.11a.1, Y54E2A.11a.2, Y54E2A.11b.1, Y54E2A.11b.2

               5327    5434      5503  5697     5742   6075      6128      6836      6906   8367
        a.1.1   |--------|   b.2.2 |----|   a.1.3 |------|   a.1.4 |---------|   a.1.5 |------|
        b.1.1     |------|                  b.1.3 |----|     b.1.4    |------|   a.2.5 |----|
        b.2.4       |----|                                   b.2.4    |----|

                |========|         |====|         |======|         |=========|         |======|
        2863   =   107        +     194       +     333        +       708       +       1461
        """

    def with_without_genrep(test):
        """Decorator. Runs *test* with genrep.root successively activated (via /db/)
        and disabled (via URL to GenRep)."""
        @wraps(test) # gives to the wrapper the original function name
        def wrapper(self):
            root = self.assembly.genrep.root
            test(self)
            self.assembly.genrep.root = ''
            test(self)
            self.assembly.genrep.root = root
        return wrapper

    def test_fasta_from_regions(self):
        expected = ({'chrI':['G','C','AAGCCTAAGCCTAAGCCTAA','CTAAGCCTAAGCCTAAGCCT','TTTTTGAAAT']}, 52)
            # GenRep request, list form
        regions = [('chrI',0,1),('chrI',1,2),('chrI',10,30),('chrI',20,40),('chrI',1010,1020)]
        url_seq = self.assembly.fasta_from_regions(regions=regions,out={})
        self.assertEqual(url_seq, expected)
            # Custom fasta file, dict form
        regions = {'chrI':[(0,1),(1,2),(10,30),(20,40),(1010,1020)]}
        custom_seq = self.assembly.fasta_from_regions(regions=regions,out={},
                            path_to_ref=os.path.join(path,"chrI_ce6_30lines.fa"))
        self.assertEqual(custom_seq, expected)
            # Fasta from cDNA (intype=2)
        regions = {'chrI':[(126947,137740)]} # F53G12.5a.1, F53G12.5b, F53G12.4, F53G12.5a.2
        #seq = self.assembly.fasta_from_regions(regions=regions,out="test.txt", intype=2)
        seq = self.assembly.fasta_from_regions(regions=regions,out={}, intype=2)
        self.assertEqual(seq[0]['chrI'][1][:40], "ATGCCAGTCGTGAGCGTTAGACCTTTTTCTATGAGAAATG") # F53G12.5b.1
        self.assertEqual(seq[1], 5870)

    def test_get_features_from_gtf(self):
        expected = {'eif-3.B': [(14795327, 14795434, 1, 'chrII'), (14795331, 14795434, 1, 'chrII'),
                                (14795333, 14795434, 1, 'chrII'), (14795503, 14795697, 1, 'chrII'),
                                (14795742, 14795907, 1, 'chrII'), (14795742, 14796075, 1, 'chrII'),
                                (14796128, 14796836, 1, 'chrII'), (14796213, 14796354, 1, 'chrII'),
                                (14796213, 14796836, 1, 'chrII'), (14796906, 14797767, 1, 'chrII'),
                                (14796906, 14798367, 1, 'chrII')]}
        h = {'keys':'gene_name', 'values':'start,end,strand',
             'conditions':'gene_id:Y54E2A.11,type:exon', 'uniq':'1'}
        # Test with local database request
        zc = self.assembly.get_features_from_gtf(h, chr='chrII')
        self.assertItemsEqual(zc,expected)
        # Test with url request via GenRep
        self.assembly.genrep.root = ''
        zc = self.assembly.get_features_from_gtf(h, chr='chrII')
        self.assertItemsEqual(zc['eif-3.B'],expected['eif-3.B'])
        self.assembly.genrep.root = self.root

    ################
    # Annot tracks #
    ################

    @with_without_genrep
    def test_gene_track(self):
        expected = ('chrI',4118,10232,'Y74C9A.3|Y74C9A.3',-1)
        track = self.assembly.gene_track()
        self.assertEqual(track.next(),expected)

    @with_without_genrep
    def test_exon_track(self):
        expected = ('chrI',4118,4358,'Y74C9A.3.1.5|Y74C9A.3|Y74C9A.3',-1,'.')
        track = self.assembly.exon_track()
        self.assertEqual(track.next(),expected)

    @with_without_genrep
    def test_transcript_track(self):
        expected = ('chrI',4118,10232,'Y74C9A.3.1|Y74C9A.3',-1)
        track = self.assembly.transcript_track()
        self.assertEqual(track.next(),expected)

    ############
    # Mappings #
    ############

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_gene_mapping(self):
        expected = ('eif-3.B',14795327,14798367,2803,1,'chrII')
        gmap = self.assembly.get_gene_mapping()
        g = gmap['Y54E2A.11']
        self.assertTupleEqual((g.name,g.start,g.end,g.length,g.strand,g.chrom), expected)

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_transcript_mapping(self):
        expected = ('Y54E2A.11',14795327,14798367,2803,1,'chrII')
        tmap = self.assembly.get_transcript_mapping()
        t = tmap['Y54E2A.11a.1']
        self.assertTupleEqual((t.gene_id,t.start,t.end,t.length,t.strand,t.chrom), expected)

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_exon_mapping(self):
        expected = (['Y54E2A.11a.1'],'Y54E2A.11','eif-3.B',14795327,14795434,1,'chrII')
        emap = self.assembly.get_exon_mapping()
        e = emap['Y54E2A.11a.1.1']
        self.assertTupleEqual((e.transcripts,e.gene_id,e.gene_name,e.start,e.end,e.strand,e.chrom), expected)
Пример #9
0
#!/usr/bin/env python

import sys
if len(sys.argv) < 2:
    print "Usage: header_translation <assembly_name>"
    sys.exit(1)

from bbcflib.genrep import Assembly

assembly = sys.argv[1]
a = Assembly(assembly)

ac2name = {}
for k, v in a.chrmeta.items():
    ac2name[v['ac']] = k

f = open("header.sam")
#g = open("reheader.txt", "wb")
h = open("reheader.sam", "wb")

for line in f:
    L = line.split('\t')
    chrom = L[1].split(':')[1]
    length = L[2].split(':')[1]
    newchrom = ac2name[chrom]
    #g.write('%s\t%s' % (newchrom,length))
    h.write(line.replace(chrom, newchrom))

f.close()
g.close()
Пример #10
0
from bbcflib.genrep import Assembly
a = Assembly('hg38')
chrmeta = a.chrmeta

md5 = "cbcc5aeeb39d29065c6641aafd5ccaa430706008"

filename = "%s_ENSEMBL.gtf" % md5
to = "%s_REFSEQ.gtf" % md5
f = open(filename)
g = open(to, "wb")
for line in f:
    L = line.split('\t')
    ensembl = L[0]
    refseq = chrmeta[ensembl]['ac']
    newline = [refseq] + L[1:]
    g.write('\t'.join(newline))
f.close()
g.close()
Пример #11
0
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.root = self.assembly.genrep.root
     self.intype = 0
     """
Пример #12
0
class Test_Assembly(unittest.TestCase):
    def setUp(self):
        self.assembly = Assembly('ce6')
        self.root = self.assembly.genrep.root
        self.intype = 0
        """
        Gene Y54E2A.11 (-1 to each Ensembl start by convention)
        4 transcripts, Y54E2A.11a.1, Y54E2A.11a.2, Y54E2A.11b.1, Y54E2A.11b.2

               5327    5434      5503  5697     5742   6075      6128      6836      6906   8367
        a.1.1   |--------|   b.2.2 |----|   a.1.3 |------|   a.1.4 |---------|   a.1.5 |------|
        b.1.1     |------|                  b.1.3 |----|     b.1.4    |------|   a.2.5 |----|
        b.2.4       |----|                                   b.2.4    |----|

                |========|         |====|         |======|         |=========|         |======|
        2863   =   107        +     194       +     333        +       708       +       1461
        """

    def with_without_genrep(test):
        """Decorator. Runs *test* with genrep.root successively activated (via /db/)
        and disabled (via URL to GenRep)."""
        @wraps(test)  # gives to the wrapper the original function name
        def wrapper(self):
            root = self.assembly.genrep.root
            test(self)
            self.assembly.genrep.root = ''
            test(self)
            self.assembly.genrep.root = root

        return wrapper

    def test_fasta_from_regions(self):
        expected = ({
            'chrI': [
                'G', 'C', 'AAGCCTAAGCCTAAGCCTAA', 'CTAAGCCTAAGCCTAAGCCT',
                'TTTTTGAAAT'
            ]
        }, 52)
        # GenRep request, list form
        regions = [('chrI', 0, 1), ('chrI', 1, 2), ('chrI', 10, 30),
                   ('chrI', 20, 40), ('chrI', 1010, 1020)]
        url_seq = self.assembly.fasta_from_regions(regions=regions, out={})
        self.assertEqual(url_seq, expected)
        # Custom fasta file, dict form
        regions = {'chrI': [(0, 1), (1, 2), (10, 30), (20, 40), (1010, 1020)]}
        custom_seq = self.assembly.fasta_from_regions(
            regions=regions,
            out={},
            path_to_ref=os.path.join(path, "chrI_ce6_30lines.fa"))
        self.assertEqual(custom_seq, expected)
        # Fasta from cDNA (intype=2)
        regions = {
            'chrI': [(126947, 137740)]
        }  # F53G12.5a.1, F53G12.5b, F53G12.4, F53G12.5a.2
        #seq = self.assembly.fasta_from_regions(regions=regions,out="test.txt", intype=2)
        seq = self.assembly.fasta_from_regions(regions=regions,
                                               out={},
                                               intype=2)
        self.assertEqual(
            seq[0]['chrI'][1][:40],
            "ATGCCAGTCGTGAGCGTTAGACCTTTTTCTATGAGAAATG")  # F53G12.5b.1
        self.assertEqual(seq[1], 5870)

    def test_get_features_from_gtf(self):
        expected = {
            'eif-3.B': [(14795327, 14795434, 1, 'chrII'),
                        (14795331, 14795434, 1, 'chrII'),
                        (14795333, 14795434, 1, 'chrII'),
                        (14795503, 14795697, 1, 'chrII'),
                        (14795742, 14795907, 1, 'chrII'),
                        (14795742, 14796075, 1, 'chrII'),
                        (14796128, 14796836, 1, 'chrII'),
                        (14796213, 14796354, 1, 'chrII'),
                        (14796213, 14796836, 1, 'chrII'),
                        (14796906, 14797767, 1, 'chrII'),
                        (14796906, 14798367, 1, 'chrII')]
        }
        h = {
            'keys': 'gene_name',
            'values': 'start,end,strand',
            'conditions': 'gene_id:Y54E2A.11,type:exon',
            'uniq': '1'
        }
        # Test with local database request
        zc = self.assembly.get_features_from_gtf(h, chr='chrII')
        self.assertItemsEqual(zc, expected)
        # Test with url request via GenRep
        self.assembly.genrep.root = ''
        zc = self.assembly.get_features_from_gtf(h, chr='chrII')
        self.assertItemsEqual(zc['eif-3.B'], expected['eif-3.B'])
        self.assembly.genrep.root = self.root

    ################
    # Annot tracks #
    ################

    @with_without_genrep
    def test_gene_track(self):
        expected = ('chrI', 4118, 10232, 'Y74C9A.3|Y74C9A.3', -1)
        track = self.assembly.gene_track()
        self.assertEqual(track.next(), expected)

    @with_without_genrep
    def test_exon_track(self):
        expected = ('chrI', 4118, 4358, 'Y74C9A.3.1.5|Y74C9A.3|Y74C9A.3', -1,
                    '.')
        track = self.assembly.exon_track()
        self.assertEqual(track.next(), expected)

    @with_without_genrep
    def test_transcript_track(self):
        expected = ('chrI', 4118, 10232, 'Y74C9A.3.1|Y74C9A.3', -1)
        track = self.assembly.transcript_track()
        self.assertEqual(track.next(), expected)

    ############
    # Mappings #
    ############

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_gene_mapping(self):
        expected = ('eif-3.B', 14795327, 14798367, 2803, 1, 'chrII')
        gmap = self.assembly.get_gene_mapping()
        g = gmap['Y54E2A.11']
        self.assertTupleEqual(
            (g.name, g.start, g.end, g.length, g.strand, g.chrom), expected)

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_transcript_mapping(self):
        expected = ('Y54E2A.11', 14795327, 14798367, 2803, 1, 'chrII')
        tmap = self.assembly.get_transcript_mapping()
        t = tmap['Y54E2A.11a.1']
        self.assertTupleEqual(
            (t.gene_id, t.start, t.end, t.length, t.strand, t.chrom), expected)

    @unittest.skip('slow')
    @with_without_genrep
    def test_get_exon_mapping(self):
        expected = (['Y54E2A.11a.1'], 'Y54E2A.11', 'eif-3.B', 14795327,
                    14795434, 1, 'chrII')
        emap = self.assembly.get_exon_mapping()
        e = emap['Y54E2A.11a.1.1']
        self.assertTupleEqual((e.transcripts, e.gene_id, e.gene_name, e.start,
                               e.end, e.strand, e.chrom), expected)