def setUp(self): self.assembly = Assembly('ce6') self.assembly.genrep = GenRep(url='http://bbcftools.epfl.ch/genrep/', root='/db/genrep') self.assembly.intype = '0' self.chromosomes = { (3066, u'NC_003279', 6): { 'length': 15072421, 'name': u'chrI' }, (3067, u'NC_003280', 7): { 'length': 15279323, 'name': u'chrII' }, (3068, u'NC_003281', 8): { 'length': 13783681, 'name': u'chrIII' }, (3069, u'NC_003282', 5): { 'length': 17493785, 'name': u'chrIV' }, (3070, u'NC_003283', 8): { 'length': 20919568, 'name': u'chrV' }, (3071, u'NC_003284', 7): { 'length': 17718854, 'name': u'chrX' }, (2948, u'NC_001328', 1): { 'length': 13794, 'name': u'chrM' } }
def adn(self, ass, chr, id, **kw): id = int(id) g = GenRep() chrs = g.get_genrep_objects('chromosomes', 'chromosome', filters={'name': chr}, params={'assembly_id': ass}) ass = Assembly(ass) for chrid, chrs in ass.chromosomes.iteritems(): if chrs['name'] == chr: start = id * chunk end = start + chunk return g.get_sequence(chrid[0], [[start, end]]) return ''
def merge_junc_files(trackList, assembly): out = track('all.junc', format='txt', fields=['chr', 'start', 'end', 'strand', 'score']) from bbcflib.genrep import Assembly a = Assembly(assembly) for c in a.chromosomes: tl = [ track(t, fields=['chr', 'start', 'end', 'strand', 'score'], format='txt').read(str(c[0]) + '_' + c[1] + '.' + str(c[2])) for t in trackList ] #all = concatenate(tl,remove_duplicates=True) all = concatenate(tl, group_by=['chr', 'start', 'end'], aggregate={'score': lambda x: sum(x)}) out.write(all, mode='append')
def main(assembly, filename): a = Assembly(assembly) tmap = a.get_transcript_mapping() # Get total num of reads f = open(filename) g = open('simulation/count_simulation.txt', 'wb') header = [ 'ID', 'Count', 'RPKM', 'Chrom', 'Start', 'End', 'Strand', 'GeneName', 'Length', 'Type', 'Sense', 'Synonyms' ] g.write('\t'.join(header) + '\n') for line in f: loc, tid, coding, length, \ expr_fraction, expr_number, lib_fraction, lib_number, seq_fraction, seq_number, \ cov_fraction, chisq, var_coeff = line.split('\t') chrom, coord = loc.split(':') start, end = coord[:-1].split('-') strand = '1' if coord[-1] == 'W' else '-1' nreads = float(seq_number) if nreads != 0: ntotal = nreads / float(seq_fraction) rpkm = 1e9 * nreads / (float(length) * ntotal) else: rpkm = 0.0 t = tmap.get(tid) if t is not None: newline = [ tid, seq_number, str(rpkm), chrom, start, end, strand, t.gene_name, length, 'transcript', '.', '.' ] g.write('\t'.join(newline) + '\n') f.close() g.close()
def add_new_sequence(sequence): ''' Method called when a new sequence is created on GDV. It should import fast from JBrowse ''' print 'add new sequence' file_url = Assembly(sequence).get_sqlite_url() print file_url out = os.path.join(filemanager.temporary_directory(), 'Genes.sql') fileinfo = filemanager.FileInfo(inputtype='url', inpath=file_url, trackname='Genes', extension='sql', outpath=out, admin=True) print fileinfo user = DBSession.query(User).filter( User.key == constants.admin_user_key()).first() user_info = {'id': user.id, 'name': user.name, 'email': user.email} sequence_info = {'id': sequence.id, 'name': sequence.name} # track t = Track() t.name = fileinfo.trackname t.sequence_id = sequence.id t.user_id = user.id DBSession.add(t) DBSession.flush() # send task async = tasks.new_input.delay(user_info, fileinfo, sequence_info, t.id) t.task_id = async .task_id DBSession.add(t) sequence.default_tracks.append(t) DBSession.add(sequence) DBSession.flush()
#!/usr/bin/env python import sys if len(sys.argv) < 2: print "Usage: header_translation <assembly_name>" sys.exit(1) from bbcflib.genrep import Assembly assembly = sys.argv[1] a = Assembly(assembly) ac2name = {} for k, v in a.chrmeta.items(): ac2name[v['ac']] = k f = open("header.sam") #g = open("reheader.txt", "wb") h = open("reheader.sam", "wb") for line in f: L = line.split('\t') chrom = L[1].split(':')[1] length = L[2].split(':')[1] newchrom = ac2name[chrom] #g.write('%s\t%s' % (newchrom,length)) h.write(line.replace(chrom, newchrom)) f.close() g.close()
from bbcflib.genrep import Assembly a = Assembly('hg38') chrmeta = a.chrmeta md5 = "cbcc5aeeb39d29065c6641aafd5ccaa430706008" filename = "%s_ENSEMBL.gtf" % md5 to = "%s_REFSEQ.gtf" % md5 f = open(filename) g = open(to, "wb") for line in f: L = line.split('\t') ensembl = L[0] refseq = chrmeta[ensembl]['ac'] newline = [refseq] + L[1:] g.write('\t'.join(newline)) f.close() g.close()
def setUp(self): self.assembly = Assembly('ce6') self.root = self.assembly.genrep.root self.intype = 0 """