示例#1
0
 def __call__(self, **kw):
     assembly_id = kw.get('assembly') or None
     assembly = genrep.Assembly(assembly_id)
     tinput = track(kw.get('track'), chrmeta=assembly.chrmeta)
     try:
         thPromot = int(kw.get("promoter"))
     except (ValueError, TypeError):
         thPromot = prom_def
     try:
         thInter = int(kw.get("intergenic"))
     except (ValueError, TypeError):
         thInter = inter_def
     try:
         thUTR = int(kw.get("UTR"))
     except (ValueError, TypeError):
         thUTR = utr_def
     output = self.temporary_path(fname=tinput.name+'_annotated.txt')
     _fields = tinput.fields+['gene', 'location_type', 'distance']
     tout = track(output, format='txt', fields=_fields)
     tout.make_header("#"+"\t".join(tout.fields))
     for chrom in assembly.chrnames:
         tout.write(getNearestFeature(
                 tinput.read(selection=chrom),
                 assembly.gene_track(chrom),
                 thPromot, thInter, thUTR), mode='append')
     tout.close()
     self.new_file(output, 'table')
     return self.display_time()
示例#2
0
 def __call__(self, **kw):
     func = kw.get('function', "log2")
     l_track = kw.get('SigMulti', {}).get('track', [])
     if not isinstance(l_track, list): l_track = [l_track]
     outall = []
     for tname in l_track:
         tinput = track(tname)
         if 'score' not in tinput.fields: continue
         format = kw.get('format', tinput.format)
         out_name = tinput.name + '_' + func + '.' + format
         outtemp = self.temporary_path(out_name)
         out_track = track(outtemp, chrmeta=tinput.chrmeta)
         filtered = score_threshold(tinput, strict=(func[:3] == "log"))
         out_track.write(apply(filtered, 'score', eval(func)), mode='write')
         out_track.close()
         outall.append(outtemp)
         tinput.close()
     if len(outall) == 1:
         self.new_file(outall[0], 'converted_track')
     elif len(outall) > 1:
         tar_name = self.temporary_path(fname="numeric_operation_out.tgz")
         tar = tarfile.open(tar_name, "w:gz")
         [tar.add(f, arcname=os.path.basename(f)) for f in outall]
         tar.close()
         self.new_file(tar_name, 'converted_track_tar')
     return self.display_time()
示例#3
0
 def __call__(self, **kw):
     tinput = track(kw.get('track'), chrmeta=kw.get('assembly') or None)
     outformat = kw.get('format',tinput.format)
     wsize = int(kw.get('window_size', size_def) or 10)
     wstep = int(kw.get('window_step', step_def) or 1)
     featurewise = kw.get('by_feature', False)
     if isinstance(featurewise, basestring):
         featurewise = (featurewise.lower() in ['1', 'true', 't','on'])
     output = self.temporary_path(fname=tinput.name+'_smoothed', ext=outformat)
     if featurewise:
         outfields = tinput.fields
         datatype = "qualitative"
     else:
         outfields = ["chr","start", "end", "score"]
         datatype = "quantitative"
     tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={'datatype': datatype})
     for chrom in tout.chrmeta.keys():
         s = window_smoothing(
             tinput.read(selection=chrom, fields=outfields),
             window_size=wsize, step_size=wstep,
             featurewise=featurewise)
         tout.write(s, chrom=chrom, clip=True)
     tout.close()
     self.new_file(output, 'smoothed_track')
     return self.display_time()
示例#4
0
    def __call__(self, **kw):
        # Create a track with the whole chromosome
        chrmeta = _get_chrmeta(**kw)
        sig0 = track(kw['TrackMulti']['tracks'][0])
        fields = sig0.fields
        format = sig0.format
        is_chr = 'chr' in fields
        _f0 = ('chr','start','end') if is_chr else ('start','end')
        _f1 = [f for f in fields if f not in _f0]
        whole_chr = []
        if is_chr:
            for chr in chrmeta:
                whole_chr.append( (chr,0,chrmeta[chr]['length'])+('0',)*len(_f1) )
        else:
            fields = [f for f in fields if f not in ['start','end']]
            fields = ['start','end']+fields
            for chr in chrmeta:
                whole_chr.append( (0,chrmeta[chr]['length'])+('0',)*len(_f1) )
        whole_chr = FeatureStream(whole_chr,fields=fields)
        temp = self.temporary_path()+'.'+format
        with track(temp,fields=fields) as wc:
            wc.write(whole_chr)

        kw['TrackMulti']['tracks'] = [temp] + kw['TrackMulti']['tracks']
        output = self.temporary_path(fname='combined.')
        output = _combine(self._func,output,**kw)
        self.new_file(output, 'combined')
        return self.display_time()
示例#5
0
 def test_chr_loop(self):
     tempfile = os.path.join(path, 'temp6.txt')
     t = track(self.bed)
     out = track(tempfile, fields=t.fields)
     for chr in ['chrII', 'chrIII', 'chrIV']:
         s = t.read(chr)
         out.write(s)
示例#6
0
    def __call__(self, **kw):
        # Create a track with the whole chromosome
        chrmeta = _get_chrmeta(**kw)
        sig0 = track(kw['TrackMulti']['tracks'][0])
        fields = sig0.fields
        format = sig0.format
        is_chr = 'chr' in fields
        _f0 = ('chr', 'start', 'end') if is_chr else ('start', 'end')
        _f1 = [f for f in fields if f not in _f0]
        whole_chr = []
        if is_chr:
            for chr in chrmeta:
                whole_chr.append((chr, 0, chrmeta[chr]['length']) +
                                 ('0', ) * len(_f1))
        else:
            fields = [f for f in fields if f not in ['start', 'end']]
            fields = ['start', 'end'] + fields
            for chr in chrmeta:
                whole_chr.append((0, chrmeta[chr]['length']) +
                                 ('0', ) * len(_f1))
        whole_chr = FeatureStream(whole_chr, fields=fields)
        temp = self.temporary_path() + '.' + format
        with track(temp, fields=fields) as wc:
            wc.write(whole_chr)

        kw['TrackMulti']['tracks'] = [temp] + kw['TrackMulti']['tracks']
        output = self.temporary_path(fname='combined.')
        output = _combine(self._func, output, **kw)
        self.new_file(output, 'combined')
        return self.display_time()
示例#7
0
def run_wellington( ex, tests, names, assembly, via, logfile ):
    futures = {}
    logfile.write("Running Wellington:\n");logfile.flush()
    wellout = {}
    for nbam,bed_bam in enumerate(tests):
        name = names['tests'][nbam]
        wellout[name] = []
        tbed = track(bed_bam[0])
        for chrom in assembly.chrnames:
            _chrombed = unique_filename_in()
            with track(_chrombed,format="bed",fields=tbed.fields) as _tt:
                if len(bed_bam) > 2:
                    _neighb = neighborhood( tbed.read(chrom), before_start=bed_bam[2], after_end=bed_bam[2] )
                else:
                    _neighb = tbed.read(chrom)
                _tt.write(fusion(_neighb),clip=True)
            if os.path.getsize(_chrombed) > 0:
                futures[(chrom,name)] = wellington.nonblocking(ex, _chrombed, bed_bam[1], via=via, memory=8)
    for chro_name, _fut in futures.iteritems():
        chrom, name = chro_name
        logfile.write(name[1]+" "+chrom+", ");logfile.flush()
        wellout[name].append(_fut.wait())
    logfile.write("\n");logfile.flush()
    bedlist = save_wellington(ex, wellout, assembly.chrmeta)
    return bedlist
示例#8
0
 def test_chr_loop(self):
     tempfile = os.path.join(path,'temp6.txt')
     t = track(self.bed)
     out = track(tempfile, fields=t.fields)
     for chr in ['chrII','chrIII','chrIV']:
         s = t.read(chr)
         out.write(s)
示例#9
0
 def __call__(self, **kw):
     func = kw.get('function',"log2")
     #l_track = kw.get('SigMulti', {}).get('track',[])
     l_track = kw.get('track',[])
     if not isinstance(l_track, list): l_track = [l_track]
     outall = []
     for tname in l_track :
         tinput = track(tname)
         if 'score' not in tinput.fields: continue
         format = kw.get('output',tinput.format)
         out_name = tinput.name+'_'+func+'.'+format
         outtemp = self.temporary_path(out_name)
         out_track = track(outtemp,chrmeta=tinput.chrmeta)
         filtered = score_threshold(tinput, strict=(func[:3] == "log"))
         out_track.write(apply(filtered,'score',eval(func)), mode='write')
         out_track.close()
         outall.append(outtemp)
         tinput.close()
     if len(outall) == 1:
         self.new_file(outall[0], 'converted_track')
     elif len(outall) > 1:
         tar_name = self.temporary_path(fname="numeric_operation_out.tgz")
         tar = tarfile.open(tar_name, "w:gz")
         [tar.add(f,arcname=os.path.basename(f)) for f in outall]
         tar.close()
         self.new_file(tar_name, 'converted_track_tar')
     return self.display_time()
示例#10
0
 def __call__(self, **kw):
     # Set assembly
     assembly_id = kw.get('assembly')
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
     # Set features track
     features = track(kw['features'], chrmeta=chrmeta or None)
     chrmeta = features.chrmeta
     # Set filter track
     filter = track(kw.get('filter'), chrmeta=chrmeta or None)
     # Main
     format = kw.get('format', features.format)
     output = self.temporary_path(fname=features.name + '_filtered.' +
                                  format)
     tout = track(output,
                  format,
                  fields=filter.fields,
                  chrmeta=chrmeta,
                  info={'datatype': 'qualitative'})
     for chrom in chrmeta:
         tout.write(overlap(features.read(chrom), filter.read(chrom)),
                    chrom=chrom,
                    clip=True)
     tout.close()
     self.new_file(output, 'filtered')
     return self.display_time()
示例#11
0
 def test_get_chrmeta(self):
     t = track(self.bed,chrmeta=self.assembly)
     self.assertEqual(t.chrmeta['chrV'],{'length':576869, 'ac':'2508_NC_001137.2'})
     # "guess"
     t = track(self.bed,chrmeta="guess")
     self.assertEqual(t.chrmeta, {'chrII':{'length':607135}, 'chrIII':{'length':178216},
                                  'chrIV':{'length':1402556}} )
示例#12
0
def create_tracks(ex, outall, sample_names, assembly):
    """Write BED tracks showing SNPs found in each sample."""
    infields = ['chromosome','position','reference']+sample_names+['gene','location_type','distance']
    intrack = track(outall, format='text', fields=infields, chrmeta=assembly.chrmeta,
                    intypes={'position':int})
    instream = intrack.read(fields=infields[:-3])
    outtracks = {}
    for sample_name in sample_names:
        out = unique_filename_in()+'.bed.gz'
        t = track(out,fields=['name'])
        t.make_header(name=sample_name+"_SNPs")
        outtracks[sample_name] = (t,out)

    def _row_to_annot(x,ref,n):
        if x[3+n][0] == ref: return None
        else: return "%s>%s"%(ref,x[3+n][0])

    for x in instream:
        coord = (x[0],x[1]-1,x[1])
        ref = x[2]
        snp = dict((name, _row_to_annot(x,ref,n)) for n,name in enumerate(sample_names))
        for name, tr in outtracks.iteritems():
            if snp[name]: tr[0].write([coord+(snp[name],)],mode='append')
    for name, tr in outtracks.iteritems():
        tr[0].close()
        description = set_file_descr(name+"_SNPs.bed.gz",type='bed',step='tracks',gdv='1',ucsc='1')
        ex.add(tr[1], description=description)
示例#13
0
def run_wellington(ex, tests, names, assembly, via, logfile):
    futures = {}
    logfile.write("Running Wellington:\n")
    logfile.flush()
    wellout = {}
    for nbam, bed_bam in enumerate(tests):
        name = names['tests'][nbam]
        wellout[name] = []
        tbed = track(bed_bam[0])
        for chrom in assembly.chrnames:
            _chrombed = unique_filename_in()
            with track(_chrombed, format="bed", fields=tbed.fields) as _tt:
                if len(bed_bam) > 2:
                    _neighb = neighborhood(tbed.read(chrom),
                                           before_start=bed_bam[2],
                                           after_end=bed_bam[2])
                else:
                    _neighb = tbed.read(chrom)
                _tt.write(fusion(_neighb), clip=True)
            if os.path.getsize(_chrombed) > 0:
                futures[(chrom, name)] = wellington.nonblocking(ex,
                                                                _chrombed,
                                                                bed_bam[1],
                                                                via=via,
                                                                memory=8)
    for chro_name, _fut in futures.iteritems():
        chrom, name = chro_name
        logfile.write(name[1] + " " + chrom + ", ")
        logfile.flush()
        wellout[name].append(_fut.wait())
    logfile.write("\n")
    logfile.flush()
    bedlist = save_wellington(ex, wellout, assembly.chrmeta)
    return bedlist
示例#14
0
 def __call__(self, **kw):
     tinput = track(kw.get("track"), chrmeta=kw.get("assembly") or None)
     outformat = kw.get("output", tinput.format)
     wsize = int(kw.get("window_size", size_def) or 10)
     wstep = int(kw.get("window_step", step_def) or 1)
     featurewise = kw.get("by_feature", False)
     if isinstance(featurewise, basestring):
         featurewise = featurewise.lower() in ["1", "true", "t", "on"]
     output = self.temporary_path(fname=tinput.name + "_smoothed", ext=outformat)
     if featurewise:
         outfields = tinput.fields
         datatype = "qualitative"
     else:
         outfields = ["chr", "start", "end", "score"]
         datatype = "quantitative"
     tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={"datatype": datatype})
     for chrom in tout.chrmeta.keys():
         s = window_smoothing(
             tinput.read(selection=chrom, fields=outfields),
             window_size=wsize,
             step_size=wstep,
             featurewise=featurewise,
         )
         tout.write(s, chrom=chrom, clip=True)
     tout.close()
     self.new_file(output, "smoothed_track")
     return self.display_time()
示例#15
0
def fimo(motifs,fasta,qval=True):
    # Run Fimo
    if qval:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.01 --qv-thresh"
    else:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.000001"
    cmd = "fimo " + options + " %s %s" % (motifs, fasta)
    print "Running >>",cmd
    os.system(cmd)
    os.system("sort -k2,2n -k3,3n -k4,4n fimo_out/fimo.txt > fimo.txt")

    # Bed output
    t = track('fimo.txt', fields=["name","chr","start","end","strand","score","p-value","q-value","sequence"])
    t.fields = ["name","chr","start","end","strand","a","score","q","sequence"]
    s = t.read()
    s = select(s,['chr','start','end','name','score','strand'])
    s = apply(s,'chr',lambda x:x.split('|')[1])
    s = sorted_stream(s)
    s = cobble(s)
    s = apply(s,'name',lambda x:'|'.join(list(set(x.split('|')))))
    outname = 'fimo.bed'
    bed = track(outname,fields=s.fields)
    bed.make_header(name="TSS_motifs", description="Motifs +-XKb around TSS", mode='overwrite')
    bed.write(s)
    if os.path.exists("fimo_out"): shutil.rmtree("fimo_out")
示例#16
0
def fimo(motifs, fasta, qval=True):
    # Run Fimo
    if qval:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.01 --qv-thresh"
    else:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.000001"
    cmd = "fimo " + options + " %s %s" % (motifs, fasta)
    print "Running >>", cmd
    os.system(cmd)
    os.system("sort -k2,2n -k3,3n -k4,4n fimo_out/fimo.txt > fimo.txt")

    # Bed output
    t = track('fimo.txt',
              fields=[
                  "name", "chr", "start", "end", "strand", "score", "p-value",
                  "q-value", "sequence"
              ])
    t.fields = [
        "name", "chr", "start", "end", "strand", "a", "score", "q", "sequence"
    ]
    s = t.read()
    s = select(s, ['chr', 'start', 'end', 'name', 'score', 'strand'])
    s = apply(s, 'chr', lambda x: x.split('|')[1])
    s = sorted_stream(s)
    s = cobble(s)
    s = apply(s, 'name', lambda x: '|'.join(list(set(x.split('|')))))
    outname = 'fimo.bed'
    bed = track(outname, fields=s.fields)
    bed.make_header(name="TSS_motifs",
                    description="Motifs +-XKb around TSS",
                    mode='overwrite')
    bed.write(s)
    if os.path.exists("fimo_out"): shutil.rmtree("fimo_out")
示例#17
0
 def __call__(self, **kw):
     assembly = kw.get('assembly') or 'guess'
     signals_plus = kw.get('SigMultiP',{}).get('signals_plus', [])
     if not isinstance(signals_plus, list): signals_plus = [signals_plus]
     signals_minus = kw.get('SigMultiM',{}).get('signals_minus', [])
     if not isinstance(signals_minus, list): signals_minus = [signals_minus]
     features = kw.get('FeatMulti',{}).get('features', [])
     if not isinstance(features, list): features = [features]
     sptracks = [track(sig,chrmeta=assembly) for sig in signals_plus if os.path.exists(sig)]
     smtracks = [track(sig,chrmeta=assembly) for sig in signals_minus if os.path.exists(sig)]
     ftracks = [track(feat,chrmeta=assembly) for feat in features if os.path.exists(feat)]
     snames = [t.name for t in sptracks+smtracks+ftracks]
     if len(sptracks) > 0:
         chrmeta = sptracks[0].chrmeta
     elif len(smtracks) > 0:
         chrmeta = smtracks[0].chrmeta
     elif len(features) > 0:
         chrmeta = ftracks[0].chrmeta
     else:
         raise ValueError("No data provided")
     if assembly in [x[0] for x in genrep.GenRep().assemblies_available()]:
         chrnames = genrep.Assembly(assembly).chrnames
     else:
         chrnames = [x[1] for x in sorted([(v['length'],c) for c,v in chrmeta.iteritems()],reverse=True)]
     pdf = self.temporary_path(fname='genome_graph.pdf')
     _fs = ['chr','start','end','score']
     _ff = ['chr','start','end','name']
     genomeGraph([(c,chrmeta[c]['length']) for c in chrnames],
                 [sig.read(fields=_fs) for sig in sptracks],
                 [sig.read(fields=_fs) for sig in smtracks],
                 [feat.read(fields=_ff) for feat in ftracks],
                 output=pdf, new=True, last=True, legend=snames)
     self.new_file(pdf, 'genome_graph')
     return self.display_time()
示例#18
0
 def test_make_header(self):
     t = track(self.bed)
     o = track("temp.bed",fields=t.fields)
     t.open()
     o.make_header(t.filehandle.readline()) # copy the header
     t.filehandle.seek(0)
     o.write(t.read())
     o.close()
     self.assertEqual(list(t.read()),list(o.read()))
示例#19
0
    def __call__(self, **kw):
        _f = ['start', 'end', 'score']
        format = kw.get("format") or "sql"
        bamfiles = kw.get('BamMulti', {}).get('bamfiles', [])
        if not isinstance(bamfiles, (tuple, list)): bamfiles = [bamfiles]
        bamfiles = [track(bam) for bam in bamfiles]
        all_tracks = []
        pdf = self.temporary_path(fname='Paired_end_plots.pdf')
        robjects.r('pdf("%s",paper="a4",height=11,width=8)' % pdf)
        midpoint = kw.get("midpoint", False)
        if isinstance(midpoint, basestring):
            midpoint = (midpoint.lower() in ['1', 'true', 't', 'on'])
        plot_only = kw.get("plot_only", False)
        if isinstance(plot_only, basestring):
            plot_only = (plot_only.lower() in ['1', 'true', 't', 'on'])

        for bam in bamfiles:
            if not plot_only:
                tname = "%s_frags.%s" % (bam.name, format)
                outname = self.temporary_path(fname=tname)
                all_tracks.append(outname)
                trout = track(outname,
                              fields=_f,
                              chrmeta=bam.chrmeta,
                              info={
                                  'datatype': 'quantitative',
                                  'PE_midpoint': midpoint
                              })
            self.frag_rep = {}
            self.frag_size = {}
            self.nb_frag = 0
            for chrom, cval in bam.chrmeta.iteritems():
                self._compute_stats(bam.fetch(chrom, 0, cval['length']))
                if not plot_only:
                    trout.write(bam.PE_fragment_size(chrom, midpoint=midpoint),
                                fields=_f,
                                chrom=chrom)
            if not plot_only: trout.close()
            if self.nb_frag > 1:
                self._plot_stats(bam.name)
            else:
                raise ValueError("No paired-end found in %s" % bam.name)
        robjects.r('dev.off()')
        if not plot_only:
            if len(all_tracks) > 1:
                tarname = self.temporary_path(fname='PE_fragment_tracks.tgz')
                tar_tracks = tarfile.open(tarname, "w:gz")
                [
                    tar_tracks.add(f, arcname=os.path.basename(f))
                    for f in all_tracks
                ]
                tar_tracks.close()
                self.new_file(tarname, 'fragment_track_tar')
            else:
                self.new_file(all_tracks[0], 'fragment_track')
        self.new_file(pdf, 'statistics_plot')
        return self.display_time()
示例#20
0
 def test_make_header(self):
     t = track(self.bed)
     o = track("temp.bed", fields=t.fields)
     t.open()
     o.make_header(t.filehandle.readline())  # copy the header
     t.filehandle.seek(0)
     o.write(t.read())
     o.close()
     self.assertEqual(list(t.read()), list(o.read()))
示例#21
0
文件: track.py 项目: SilasK/bbcfutils
def read(*args, **kw):
    if len(args) < 1: raise Usage("No input file provided")
    selection = None
    if kw['selection']:
        if kw['selection'].count("{"):
            jsonargs = json.loads(kw['selection'])
            for k, v in jsonargs.iteritems():
                if isinstance(v, basestring): jsonargs[k] = str(v)
            selection = dict((str(k), v) for k, v in jsonargs.iteritems())
        elif kw['selection'].count(":"):
            chr, coord = kw['selection'].split(':')
            start, end = coord.split('-')
            selection = {
                'chr': chr,
                'start': (int(start), int(end)),
                'end': (int(start), int(end))
            }
        else:
            selection = str(kw['selection']).split(",")
    fields = None
    outformat = None
    if kw['fields']:
        fields = str(kw['fields']).split(",")
    if kw['output'] is None:
        output = sys.stdout
        outformat = "txt"
    else:
        output = open(kw['output'], 'w')
    chrmeta = _get_chrmeta(**kw)
    for infile in args:
        intrack = track.track(infile, format=kw['format'], chrmeta=chrmeta)
        if kw['description']:
            if intrack.info:
                fileinfo = ",".join(
                    ["%s=%s" % (k, v) for k, v in intrack.info.iteritems()])
            else:
                fileinfo = 'None'
            chromlist = ",".join(sorted(intrack.chrmeta.keys())) or "None"
            fields = ",".join(intrack.fields)
            output.write(\
"""# *****************************************
# File '%s' (%s):
# Infos: %s
# Chromosomes: %s
# Fields: %s
# *****************************************
""" %(os.path.basename(infile), intrack.format, fileinfo, chromlist, fields))
            continue
        with track.track(output, format=outformat, fields=fields) as _tout:
            _tout.write(intrack.read(selection=selection, fields=fields))
        intrack.close()
    try:
        output.close()
    except IOError:
        pass  # if stdout
    return 0
示例#22
0
def merge_junc_files(trackList,assembly):
    out = track('all.junc',format='txt',fields=['chr','start','end','strand','score'])
    from bbcflib.genrep import Assembly
    a = Assembly(assembly)
    for c in a.chromosomes:
        tl = [track(t,fields=['chr','start','end','strand','score'],format='txt').read(str(c[0])+'_'+c[1]+'.'+str(c[2]))
              for t in trackList]
        #all = concatenate(tl,remove_duplicates=True)
        all = concatenate(tl,group_by=['chr','start','end'],aggregate={'score':lambda x:sum(x)})
        out.write(all,mode='append')
示例#23
0
文件: track.py 项目: SilasK/bbcfutils
def sort(*args,**kw):
    if len(args) < 1: raise Usage("No input file provided")
    chrmeta = _get_chrmeta(**kw)
    for infile in args:
        intrack = track.track(infile,format=kw['format'],chrmeta=chrmeta)
        outname = kw['output'] or intrack.name+'_sorted.'+intrack.format
        outtrack = track.track(outname, chrmeta=intrack.chrmeta)
        instream = intrack.read()
        s = sorted_stream(instream, chrnames=json.loads(kw['chromosomes']))
        outtrack.write(s)
        intrack.close()
    return 0
示例#24
0
文件: track.py 项目: SilasK/bbcfutils
def sort(*args, **kw):
    if len(args) < 1: raise Usage("No input file provided")
    chrmeta = _get_chrmeta(**kw)
    for infile in args:
        intrack = track.track(infile, format=kw['format'], chrmeta=chrmeta)
        outname = kw['output'] or intrack.name + '_sorted.' + intrack.format
        outtrack = track.track(outname, chrmeta=intrack.chrmeta)
        instream = intrack.read()
        s = sorted_stream(instream, chrnames=json.loads(kw['chromosomes']))
        outtrack.write(s)
        intrack.close()
    return 0
示例#25
0
    def test_read(self):
        t = track(self.bed)
        s = t.read()
        self.assertIsInstance(s, FeatureStream)
        x = s.next()
        y = t.readline()
        self.assertEqual(x,y)

        # zipped file
        t = track(self.bed+'.gz')
        s = t.read(); s.next()
        self.assertIsInstance(s, FeatureStream)
示例#26
0
文件: Maplot.py 项目: bbcf/bsPlugins
    def __call__(self, **kw):

        if kw.get('input_type') == 'Table':
            table = kw.get('table')
            assert os.path.exists(str(table)), "File not found: '%s'" % table
            with open(table) as t:
                colnames = t.readline()
                _f = colnames.strip().split()
                nscores = len(_f)-1
            groups = len(list(set([x.split('.')[0] for x in _f])))
            if nscores == 2: # 3 columns, cols 2 and 3 contain the scores
                sample1 = [2]
                sample2 = [3]
            elif len(groups) == 2: # more columns, look if there are two groups of prefixes
                sample1 = [_f.index(x) for x in _f if x.split('.')==groups[0]]
                sample2 = [_f.index(x) for x in _f if x.split('.')==groups[1]]
            else: # not implemented yet, ask the user to choose the columns he wants? Checkboxes...
                raise ValueError("For the moment, either have only 2 columns of scores, \
                                 or use names of the form <group_name>.<run_id>")
        else:
            # Use QuantifyTablePlugin to build a table from score tracks
            from QuantifyTable import QuantifyTablePlugin
            # Set QuantifyTablePlugin options
            kw['score_op'] = 'sum'
            kw['format'] = 'txt'
            #signals1 = kw['Group1']['signals1']
            signals1 = kw['signals1']
            #signals2 = kw['Group2']['signals2']
            signals2 = kw['signals2']
            if not isinstance(signals1,(list,tuple)): signals1 = [signals1]
            if not isinstance(signals2,(list,tuple)): signals2 = [signals2]
            kw['signals'] = signals1 + signals2
            signals = kw['signals']
            nscores = len(signals)
            qtable = QuantifyTablePlugin().quantify(**kw)
            # Remove useless fields and add header based on file names
            qtable = track(qtable, format='txt', fields=['chr','start','end','name']+ \
                                                        ['score'+str(i) for i in range(nscores)])
            table = self.temporary_path('scores_table.txt')
            _f = ['score'+str(i) for i in range(nscores)]
            strack = track(table, fields=['name']+_f)
            signal_tracks = [track(s) for s in signals]
            signames = [s.name for s in signal_tracks]
            strack.write([('Name',signames[0],signames[1])])
            strack.write(qtable.read(fields=strack.fields))
            sample1 = range(len(signals1))
            sample2 = range(nscores-len(signals1))

        output_filename = MAplot(table, cols={1:sample1, 2:sample2})
        output = self.temporary_path(fname='maplot.png')
        shutil.move(output_filename,output)
        self.new_file(output, 'MA-plot')
        return self.display_time()
示例#27
0
文件: track.py 项目: SilasK/bbcfutils
def merge(*args,**kw):
    if not(kw['forward'] and os.path.exists(kw['forward'])):
        raise Usage("Specify a valid forward strand density file with -f.")
    if not(kw['reverse'] and os.path.exists(kw['reverse'])):
        raise Usage("Specify a valid reverse strand density file with -r.")
    if not(kw['output']):
        raise Usage("Specify the output file name.")

    def _shift(stream,shift):
        istart = stream.fields.index('start')
        iend   = stream.fields.index('end')
        i1 = min(istart,iend)
        i2 = max(istart,iend)
        def _apply_shift(x):
            return x[:i1]+(x[i1]+shift,)+x[i1+1:i2]+(x[i2]+shift,)+x[i2+1:]
        return track.FeatureStream((_apply_shift(x) for x in stream),
                                    fields=stream.fields)

    fields = ['chr','start','end','score']
    chrmeta = _get_chrmeta(**kw)
    tfwd = track.track(kw['forward'],format=kw['formatf'],chrmeta=chrmeta)
    trev = track.track(kw['reverse'],format=kw['formatr'],chrmeta=chrmeta)
    if tfwd.chrmeta:
        chrmeta = tfwd.chrmeta
    elif trev.chrmeta:
        chrmeta = trev.chrmeta
    else:
        raise Usage("Specify an assembly with -a.")

    shiftval = int(kw['shift'])
    if shiftval < 0:
        slim = 300
        chrsize,chrom = sorted([(v['length'],k)
                                for k,v in chrmeta.iteritems()],reverse=True)[0]
        xcor = correlation([tfwd.read(chrom),trev.read(chrom)],
                           (1,chrsize),limits=(-slim,slim))
        shiftval = (xcor.argmax()-slim-1)/2
        print "Autocorrelation shift=%i, correlation is %f." %(shiftval,xcor.max())

    tout = track.track(kw['output'],fields=fields,
                       chrmeta=chrmeta,info={'datatype':'quantitative'})
    mode = 'write'
    method = kw.get("method","mean")
    for chrom in chrmeta.keys():
        tout.write(merge_scores([_shift(tfwd.read(chrom), shiftval),
                                 _shift(trev.read(chrom),-shiftval)],
                                method=method),
                   chrom=chrom,mode=mode,clip=True)
        mode = 'append'
    tout.close()
    trev.close()
    tfwd.close()
    return 0
示例#28
0
文件: Ratios.py 项目: bbcf/bsPlugins
    def __call__(self,**kw):
        assembly = kw.get('assembly') or 'guess'
        t1 = track(kw['numerator'],chrmeta=assembly)
        t2 = track(kw['denominator'],chrmeta=assembly)
        format = kw.get('output') or t1.format
        wsize = int(kw.get('window_size') or size_def)
        self.log = kw.get('log',False)
        if isinstance(self.log, basestring):
            self.log = (self.log.lower() in ['1', 'true', 't','on'])
        try:
            self.pseudo = float(kw.get('pseudo'))
        except:
            self.pseudo = pseudo_def
        self.baseline = -log(self.pseudo,2)
        try:
            self.threshold = float(kw.get('threshold'))
        except:
            self.threshold = threshold_def
        distribution = kw.get('distribution',False)
        if isinstance(distribution, basestring):
            distribution = (distribution.lower() in ['1', 'true', 't','on'])
        if distribution:
            genome_length = sum((v['length'] for v in t1.chrmeta.values()))
            self.shifts = list(poisson(float(genome_length)/float(self.sample_num),self.sample_num))
            self.ratios = []

        output = self.temporary_path(fname='ratios_%s-%s.%s'%(t1.name,t2.name,format))
        with track(output, chrmeta=t1.chrmeta, fields=t1.fields,
                   info={'datatype': 'quantitative',
                         'log': self.log,
                         'pseudocounts': self.pseudo,
                         'threshold': self.threshold,
                         'window_size': wsize}) as tout:
            for chrom,vchr in t1.chrmeta.iteritems():
                if wsize > 1:
                    s1 = window_smoothing(t1.read(chrom),window_size=wsize,step_size=1,featurewise=False)
                    s2 = window_smoothing(t2.read(chrom),window_size=wsize,step_size=1,featurewise=False)
                else:
                    s1 = t1.read(chrom)
                    s2 = t2.read(chrom)
                s3 = merge_scores([s1,s2],method=self._divide)
                if distribution:
                    s3 = FeatureStream(self._sample_stream(s3,vchr['length']),fields=s3.fields)
                tout.write(s3, chrom=chrom, clip=True)
        self.new_file(output, 'ratios')

        if distribution:
            pdf = self.temporary_path(fname='%s-%s_ratios_distribution.pdf'%(t1.name,t2.name))
            density_boxplot(self.ratios,output=pdf,
                            name="%s/%s (median=%.2f)" %(t1.name,t2.name,median(self.ratios)))
            self.new_file(pdf, 'boxplot')
        return self.display_time()
示例#29
0
    def test_read(self):
        t = track(self.bed)
        s = t.read()
        self.assertIsInstance(s, FeatureStream)
        x = s.next()
        y = t.readline()
        self.assertEqual(x, y)

        # zipped file
        t = track(self.bed + '.gz')
        s = t.read()
        s.next()
        self.assertIsInstance(s, FeatureStream)
示例#30
0
    def __call__(self, **kw):
        def _shift(stream, shift):
            istart = stream.fields.index('start')
            iend = stream.fields.index('end')
            i1 = min(istart, iend)
            i2 = max(istart, iend)

            def _apply_shift(x):
                return x[:i1] + (x[i1] + shift,) + x[i1 + 1:i2] + (x[i2] + shift,) + x[i2 + 1:]
            return FeatureStream((_apply_shift(x) for x in stream),
                                       fields=stream.fields)

        assembly = kw.get('assembly') or 'guess'
        tfwd = track(kw.get('forward'), chrmeta=assembly)
        trev = track(kw.get('reverse'), chrmeta=assembly)
        chrmeta = tfwd.chrmeta

        shiftval = int(kw.get('shift', 0))
        if shiftval < 0:  # Determine shift automatically
            shiftval = None
            xcor_lim = 300
            for chrom, v in chrmeta.iteritems():
                chrsize = v['length']
                xcor_lim = min(xcor_lim, 0.01 * chrsize)
                xcor = correlation([tfwd.read(chrom), trev.read(chrom)], regions=(1, chrsize),
                                   limits=(-xcor_lim, xcor_lim))
                max_xcor_idx = xcor.argmax()
                if xcor[max_xcor_idx] > 0.2:
                    shiftval = (max_xcor_idx - xcor_lim - 1)/2
                    break
            if not shiftval:
                raise ValueError("Unable to detect shift automatically. Must specify a shift value.")

        output = self.temporary_path(fname=tfwd.name+'-'+trev.name+'_merged', 
                                     ext=kw.get('format',tfwd.format))
        outfields = [f for f in tfwd.fields if f in trev.fields]
        tout = track(output, chrmeta=chrmeta, fields=outfields,
                     info={'datatype': 'quantitative', 'shift': shiftval})
        mode = 'write'
        method = kw.get("method","mean")
        for chrom in chrmeta.keys():
            tout.write(merge_scores([_shift(tfwd.read(selection=chrom),  shiftval),
                                     _shift(trev.read(selection=chrom), -shiftval)],
                                    method=method),
                       chrom=chrom, mode=mode, clip=True)
            mode = 'append'
        tout.close()
        trev.close()
        tfwd.close()
        self.new_file(output, 'density_merged')
        return self.display_time()
示例#31
0
    def __call__(self, **kw):

        if kw.get('input_type') == 'Table':
            table = kw.get('table')
            assert os.path.exists(str(table)), "File not found: '%s'" % table
            with open(table) as t:
                colnames = t.readline()
                _f = colnames.strip().split()
                nscores = len(_f)-1
            groups = len(list(set([x.split('.')[0] for x in _f])))
            if nscores == 2: # 3 columns, cols 2 and 3 contain the scores
                sample1 = [2]
                sample2 = [3]
            elif len(groups) == 2: # more columns, look if there are two groups of prefixes
                sample1 = [_f.index(x) for x in _f if x.split('.')==groups[0]]
                sample2 = [_f.index(x) for x in _f if x.split('.')==groups[1]]
            else: # not implemented yet, ask the user to choose the columns he wants? Checkboxes...
                raise ValueError("For the moment, either have only 2 columns of scores, \
                                 or use names of the form <group_name>.<run_id>")
        else:
            # Use QuantifyTablePlugin to build a table from score tracks
            from QuantifyTable import QuantifyTablePlugin
            # Set QuantifyTablePlugin options
            kw['score_op'] = 'sum'
            kw['format'] = 'txt'
            signals1 = kw['Group1']['signals1']
            signals2 = kw['Group2']['signals2']
            if not isinstance(signals1,(list,tuple)): signals1 = [signals1]
            if not isinstance(signals2,(list,tuple)): signals2 = [signals2]
            kw['signals'] = signals1 + signals2
            signals = kw['signals']
            nscores = len(signals)
            qtable = QuantifyTablePlugin().quantify(**kw)
            # Remove useless fields and add header based on file names
            qtable = track(qtable, format='txt', fields=['chr','start','end','name']+ \
                                                        ['score'+str(i) for i in range(nscores)])
            table = self.temporary_path('scores_table.txt')
            _f = ['score'+str(i) for i in range(nscores)]
            strack = track(table, fields=['name']+_f)
            signal_tracks = [track(s) for s in signals]
            signames = [s.name for s in signal_tracks]
            strack.write([('Name',signames[0],signames[1])])
            strack.write(qtable.read(fields=strack.fields))
            sample1 = range(len(signals1))
            sample2 = range(nscores-len(signals1))

        output_filename = MAplot(table, cols={1:sample1, 2:sample2})
        output = self.temporary_path(fname='maplot.png')
        shutil.move(output_filename,output)
        self.new_file(output, 'MA-plot')
        return self.display_time()
示例#32
0
    def __call__(self, **kw):
        def _shift(stream, shift):
            istart = stream.fields.index('start')
            iend = stream.fields.index('end')
            i1 = min(istart, iend)
            i2 = max(istart, iend)

            def _apply_shift(x):
                return x[:i1] + (x[i1] + shift,) + x[i1 + 1:i2] + (x[i2] + shift,) + x[i2 + 1:]
            return FeatureStream((_apply_shift(x) for x in stream),
                                       fields=stream.fields)

        assembly = kw.get('assembly') or 'guess'
        tfwd = track(kw.get('forward'), chrmeta=assembly)
        trev = track(kw.get('reverse'), chrmeta=assembly)
        chrmeta = tfwd.chrmeta

        shiftval = int(kw.get('shift', 0))
        if shiftval < 0:  # Determine shift automatically
            shiftval = None
            xcor_lim = 300
            for chrom, v in chrmeta.iteritems():
                chrsize = v['length']
                xcor_lim = min(xcor_lim, 0.01 * chrsize)
                xcor = correlation([tfwd.read(chrom), trev.read(chrom)], regions=(1, chrsize),
                                   limits=(-xcor_lim, xcor_lim))
                max_xcor_idx = xcor.argmax()
                if xcor[max_xcor_idx] > 0.2:
                    shiftval = (max_xcor_idx - xcor_lim - 1)/2
                    break
            if not shiftval:
                raise ValueError("Unable to detect shift automatically. Must specify a shift value.")

        output = self.temporary_path(fname=tfwd.name+'-'+trev.name+'_merged', 
                                     ext=kw.get('format',tfwd.format))
        tout = track(output, chrmeta=chrmeta,
                     info={'datatype': 'quantitative', 'shift': shiftval})
        mode = 'write'
        method = kw.get("method","mean")
        for chrom in chrmeta.keys():
            tout.write(merge_scores([_shift(tfwd.read(selection=chrom), shiftval),
                                     _shift(trev.read(selection=chrom), -shiftval)],
                                    method=method),
                       chrom=chrom, mode=mode, clip=True)
            mode = 'append'
        tout.close()
        trev.close()
        tfwd.close()
        self.new_file(output, 'density_merged')
        return self.display_time()
示例#33
0
 def __call__(self, **kw):
     assembly = kw.get('assembly') or 'guess'
     signals_plus = kw.get('SigMultiP', {}).get('signals_plus', [])
     if not isinstance(signals_plus, list): signals_plus = [signals_plus]
     signals_minus = kw.get('SigMultiM', {}).get('signals_minus', [])
     if not isinstance(signals_minus, list): signals_minus = [signals_minus]
     features = kw.get('FeatMulti', {}).get('features', [])
     if not isinstance(features, list): features = [features]
     sptracks = [
         track(sig, chrmeta=assembly) for sig in signals_plus
         if os.path.exists(sig)
     ]
     smtracks = [
         track(sig, chrmeta=assembly) for sig in signals_minus
         if os.path.exists(sig)
     ]
     ftracks = [
         track(feat, chrmeta=assembly) for feat in features
         if os.path.exists(feat)
     ]
     snames = [t.name for t in sptracks + smtracks + ftracks]
     if len(sptracks) > 0:
         chrmeta = sptracks[0].chrmeta
     elif len(smtracks) > 0:
         chrmeta = smtracks[0].chrmeta
     elif len(features) > 0:
         chrmeta = ftracks[0].chrmeta
     else:
         raise ValueError("No data provided")
     if assembly in [x[0] for x in genrep.GenRep().assemblies_available()]:
         chrnames = genrep.Assembly(assembly).chrnames
     else:
         chrnames = [
             x[1] for x in sorted([(v['length'], c)
                                   for c, v in chrmeta.iteritems()],
                                  reverse=True)
         ]
     pdf = self.temporary_path(fname='genome_graph.pdf')
     _fs = ['chr', 'start', 'end', 'score']
     _ff = ['chr', 'start', 'end', 'name']
     genomeGraph([(c, chrmeta[c]['length']) for c in chrnames],
                 [sig.read(fields=_fs) for sig in sptracks],
                 [sig.read(fields=_fs) for sig in smtracks],
                 [feat.read(fields=_ff) for feat in ftracks],
                 output=pdf,
                 new=True,
                 last=True,
                 legend=snames)
     self.new_file(pdf, 'genome_graph')
     return self.display_time()
示例#34
0
 def quantify(self,**kw):
     feature_type = kw.get('feature_type', 0)
     if str(feature_type) in [str(x[0]) for x in ftypes]:
         feature_type = int(feature_type)
     func = str(kw.get('score_op', 'mean'))
     assembly_id = kw.get('assembly')
     format = kw.get('output') or 'txt'
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
         genes = assembly.gene_track
         exons = assembly.exon_track
     elif not(feature_type in ftypes[3]):
         raise ValueError("Please specify an assembly")
     #signals = kw['SigMulti'].get('signals',[])
     signals = kw.get('signals',[])
     if not isinstance(signals, list): signals = [signals]
     signals = [track(sig, chrmeta=chrmeta) for sig in signals]
     if feature_type in ftypes[0]:
         features = genes
     elif feature_type in ftypes[1]:
         prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                      'after_start': int(kw.get('downstream') or prom_down_def),
                      'on_strand': True}
         features = lambda c: neighborhood(genes(c), **prom_pars)
     elif feature_type in ftypes[2]:
         features = exons
     elif feature_type in ftypes[3]:
         assert os.path.exists(str(kw.get('features'))), "Features file not found: '%s'" % kw.get("features")
         _t = track(kw['features'], chrmeta=chrmeta)
         chrmeta = _t.chrmeta
         features = _t.read
     else:
         raise ValueError("Take feature_type in %s." %ftypes)
     output = self.temporary_path(fname='quantification.'+format)
     if len(signals) > 1:
         _f = ["score%i"%i for i in range(len(signals))]
     else:
         _f = ["score"]
     tout = track(output, format, fields=['chr','start','end','name']+_f,
                  chrmeta=chrmeta, info={'datatype':'qualitative'})
     if format == 'txt': 
         header = ['#chr','start','end','name']+[s.name for s in signals]
         tout.make_header("\t".join(header))
     for chrom in chrmeta:
         sread = [sig.read(chrom) for sig in signals]
         tout.write(score_by_feature(sread, features(chrom), method=func),
                    chrom=chrom, clip=True, mode="append")
     return output
示例#35
0
    def __call__(self, **kw):
        _f = ['start','end','score']
        format = kw.get('output') or "sql"
        #bamfiles = kw.get('BamMulti',{}).get('bamfiles',[])
        bamfiles = kw.get('bamfiles',[])
        if not isinstance(bamfiles, (tuple,list)): bamfiles = [bamfiles]
        bamfiles = [track(bam) for bam in bamfiles]
        all_tracks = []
        pdf = self.temporary_path(fname='Paired_end_plots.pdf')
        robjects.r('pdf("%s",paper="a4",height=11,width=8)' %pdf)
        midpoint = kw.get("midpoint",False)
        if isinstance(midpoint, basestring):
            midpoint = (midpoint.lower() in ['1', 'true', 't','on'])
        plot_only = kw.get("plot_only",False)
        if isinstance(plot_only, basestring):
            plot_only = (plot_only.lower() in ['1', 'true', 't','on'])

        for bam in bamfiles:
            if not plot_only:
                tname = "%s_frags.%s" %(bam.name.split("/")[-1], format)
                outname = self.temporary_path(fname=tname)
                all_tracks.append(outname)
                trout = track(outname, fields=_f, chrmeta=bam.chrmeta,
                              info={'datatype': 'quantitative', 'PE_midpoint': midpoint})
            self.frag_rep = {}
            self.frag_size = {}
            self.nb_frag = 0
            for chrom,cval in bam.chrmeta.iteritems():
                self._compute_stats(bam.fetch(chrom, 0, cval['length']))
                if not plot_only:
                    trout.write( bam.PE_fragment_size(chrom,midpoint=midpoint), 
                                 fields=_f, chrom=chrom )
            if not plot_only: trout.close()
            if self.nb_frag > 1:
                self._plot_stats(bam.name)
            else:
                raise ValueError("No paired-end found in %s" %bam.name)
        robjects.r('dev.off()')
        if not plot_only:
            if len(all_tracks)>1:
                tarname = self.temporary_path(fname='PE_fragment_tracks.tgz')
                tar_tracks = tarfile.open(tarname, "w:gz")
                [tar_tracks.add(f,arcname=os.path.basename(f)) for f in all_tracks]
                tar_tracks.close()
                self.new_file(tarname, 'fragment_track_tar')
            else:
                self.new_file(all_tracks[0], 'fragment_track')
        self.new_file(pdf,'statistics_plot')
        return self.display_time()
示例#36
0
文件: track.py 项目: SilasK/bbcfutils
def read(*args,**kw):
    if len(args) < 1: raise Usage("No input file provided")
    selection = None
    if kw['selection']:
        if kw['selection'].count("{"):
            jsonargs = json.loads(kw['selection'])
            for k,v in jsonargs.iteritems():
                if isinstance(v,basestring): jsonargs[k] = str(v)
            selection = dict((str(k),v) for k,v in jsonargs.iteritems())
        elif kw['selection'].count(":"):
            chr,coord = kw['selection'].split(':')
            start,end = coord.split('-')
            selection = {'chr':chr,'start':(int(start),int(end)),'end':(int(start),int(end))}
        else:
            selection = str(kw['selection']).split(",")
    fields = None
    outformat = None
    if kw['fields']:
        fields = str(kw['fields']).split(",")
    if kw['output'] is None:
        output = sys.stdout
        outformat = "txt"
    else:
        output = open(kw['output'],'w')
    chrmeta = _get_chrmeta(**kw)
    for infile in args:
        intrack = track.track(infile,format=kw['format'],chrmeta=chrmeta)
        if kw['description']:
            if intrack.info:
                fileinfo = ",".join(["%s=%s" %(k,v) for k,v in intrack.info.iteritems()])
            else: fileinfo = 'None'
            chromlist = ",".join(sorted(intrack.chrmeta.keys())) or "None"
            fields = ",".join(intrack.fields)
            output.write(\
"""# *****************************************
# File '%s' (%s):
# Infos: %s
# Chromosomes: %s
# Fields: %s
# *****************************************
""" %(os.path.basename(infile), intrack.format, fileinfo, chromlist, fields))
            continue
        with track.track(output,format=outformat,fields=fields) as _tout:
            _tout.write(intrack.read(selection=selection,fields=fields))
        intrack.close()
    try: output.close()
    except IOError: pass # if stdout
    return 0
示例#37
0
 def test_intersect(self):
     self.intersect(**self.kw)
     with track(self.intersect.output_files[0][0]) as t:
         s = t.read(fields=self.fields)
         content = list(s)
         expected = [('chr1', 10, 15, 17.0), ('chr1', 24, 35, 107.0)]
         self.assertListEqual(content, expected)
示例#38
0
 def test_bed(self): # as general TextTrack
     shutil.copy(self.bed, os.path.join(path,'test')) # guess extension from header
     t = track(os.path.join(path,'test'), format='bed', fields=self.fields)
     s = t.read(); s.next()
     self.assertIsInstance(t, BedTrack)
     self.assertEqual(t.format,'bed')
     self.assertListEqual(t.fields, self.fields)
示例#39
0
 def __call__(self, **kw):
     assembly = genrep.Assembly(kw.get('assembly'))
     format = kw['format']
     if kw['feature_type'] == 'genes':
         map = assembly.get_gene_mapping()
         get_info = self.genes_annot
     elif kw['feature_type'] == 'exons':
         map = assembly.get_exon_mapping()
         get_info = self.exons_annot
     elif kw['feature_type'] == 'transcripts':
         map = assembly.get_transcript_mapping()
         get_info = self.trans_annot
     def _annotate(ids_list):
         with open(ids_list) as ids_file:
             for id in ids_file:
                 id = id.strip()
                 if map.get(id):
                     yield get_info(id,map.get(id))
                 else:
                     yield ('NA','0','0',id,0.0,'0')
     ids_list = kw.get('ids_list')
     fields = ['chr','start','end','name','score','strand']
     if ids_list:
         assert os.path.exists(str(ids_list)), "File not found: '%s'" % ids_list
         fulltrack = FeatureStream(_annotate(ids_list),fields=fields)
         fname = os.path.splitext(os.path.basename(ids_list))[0]
     else:
         fulltrack = FeatureStream((get_info(g,map[g]) for g in map),fields=fields)
         fname = kw['feature_type']
     output = self.temporary_path(fname=fname+'.'+format)
     out = track(output,chrmeta=assembly)
     out.write(fulltrack)
     self.new_file(output, 'fulltrack')
     return self.display_time()
示例#40
0
 def __call__(self, **kw):
     sample = track(kw['sample'],chrmeta="guess")
     by_chrom = kw.get('by_chrom',False)
     if isinstance(by_chrom, basestring):
         by_chrom = (by_chrom.lower() in ['1', 'true', 't','on'])
     outf = kw.get('output')
     if outf not in output_list:
         outf = output_list[0]
     output = self.temporary_path(fname=sample.name+'_stats.'+outf)
     if outf == 'txt':
         out = open(output,"w")
     else:
         out = {}
     if by_chrom:
         chromlist = sample.chrmeta.keys()
     else:
         chromlist = [None]
     for chrom in chromlist:
         if outf == 'txt':
             if chrom:
                 out.write("Chromosome %s\n--------------------\n"%chrom)
             stats(sample,out=out,selection=chrom)
         else:
             out[chrom] = {}
             stats(sample,out=out[chrom],selection=chrom)
         if outf == 'txt' and chrom:
             out.write("\n--------------------\n")
     if outf == 'txt':
         out.close()
         self.new_file(output, 'stats')
     else:
         self._plot_pdf(output,out,sample.name)
         self.new_file(output, 'pdf')
     return self.display_time()
示例#41
0
 def test_subtract(self):
     self.subtract(**self.kw)
     with track(self.subtract.output_files[0][0]) as t:
         s = t.read(fields=self.fields)
         content = list(s)
         expected = [('chr1', 21, 24, 17.0)]
         self.assertListEqual(content, expected)
示例#42
0
def to_bed(filename,assembly):
    t = track(filename,fields=['chr','start','end','strand','score'],chrmeta=assembly,format='txt')
    # Translate chr names
    s = t.read()
    s1 = map_chromosomes(s, t.assembly.chromosomes)
    # Prepare output bed file
    out = track(filename.rstrip('junc')+'bed', fields=['chr','start','end','name','score','strand'])
    out.make_header({'name':filename,'description':filename})
    mode='append'
    # Add junction names
    c = itertools.count()
    s2 = duplicate(s1,'chr','name')
    s3 = apply(s2,'name',lambda x: 'junction'+str(c.next()))
    # Write
    out.write(s3,mode=mode)
    out.close()
示例#43
0
文件: gless.py 项目: bbcf/gless
 def get_type(self,filename):
     """Return whether it is a track with 'intervals' or a 'density'."""
     with track(filename) as t:
         if t.format.lower() in ['bed','sam','bam']:
             return 'intervals'
         elif t.format.lower() in ['bedgraph','wig','bigWig','sga']:
             return 'density'
示例#44
0
 def test_overlap(self):
     self.plugin(**{'input_type':'Signal','filter':path+'peaks.bedGraph',
                    'features':path+'features.bed','feature_type':3,'assembly':'mm9','format':'bed'})
     with track(self.plugin.output_files[0][0]) as t:
         s = t.read()
         content = list(s)
         self.assertEqual(len(content),3)
示例#45
0
 def test_subtract(self):
     self.subtract(**self.kw)
     with track(self.subtract.output_files[0][0]) as t:
         s = t.read(fields=self.fields)
         content = list(s)
         expected = [("chr1", 21, 24, 17.0)]
         self.assertListEqual(content, expected)
示例#46
0
 def __call__(self, **kw):
     sample = track(kw['sample'],chrmeta="guess")
     by_chrom = kw.get('by_chrom',False)
     if isinstance(by_chrom, basestring):
         by_chrom = (by_chrom.lower() in ['1', 'true', 't','on'])
     outf = kw.get('output')
     if outf not in output_list:
         outf = output_list[0]
     output = self.temporary_path(fname=sample.name+'_stats.'+outf)
     if outf == 'txt':
         out = open(output,"w")
     else:
         out = {}
     if by_chrom:
         chromlist = sample.chrmeta.keys()
     else:
         chromlist = [None]
     for chrom in chromlist:
         if outf == 'txt':
             if chrom:
                 out.write("Chromosome %s\n--------------------\n"%chrom)
             stats(sample,out=out,selection=chrom)
         else:
             out[chrom] = {}
             stats(sample,out=out[chrom],selection=chrom)
         if outf == 'txt' and chrom:
             out.write("\n--------------------\n")
     if outf == 'txt':
         out.close()
         self.new_file(output, 'stats')
     else:
         self._plot_pdf(output,out,sample.name)
         self.new_file(output, 'pdf')
     return self.display_time()
示例#47
0
 def test_quantify_table_text(self):
     self.plugin(**{'input_type':'Signal', 'SigMulti':{'signals':[path+'KO50.bedGraph', path+'WT50.bedGraph']},
                    'features':path+'features.bed', 'feature_type':3, 'assembly':'mm9', 'format':'txt'})
     with track(self.plugin.output_files[0][0], fields=["chr","start","end","name","score0","score1"]) as t:
         s = t.read()
         content = list(s)
         self.assertEqual(len(content),9)
示例#48
0
def plot_footprint_profile(ex, bedlist, signals, chrnames, groups, logfile):
    files = dict((gid, {'pdf': "", 'mat': []}) for gid in bedlist.keys())
    logfile.write("Plotting footprints:\n")
    logfile.flush()
    for gid, motifbed in bedlist.iteritems():
        #        signals = [track(sig) for sig in siglist[gid]]
        snames = [sig.name for sig in signals[gid]]
        tmotif = track(motifbed, format='bed')
        data = {}
        numregs = {}
        for chrom in chrnames:
            fread = {}
            for r in tmotif.read(chrom):
                r2 = r[3].split(":")
                key = (r2[0], len(r2[1]))
                if key in fread: fread[key].append(r[1:3])
                else: fread[key] = [r[1:3]]
            for motif, regs in fread.iteritems():
                if motif not in data:
                    data[motif] = zeros(shape=(motif[1] + 2 * _plot_flank[1],
                                               len(signals[gid])))
                    numregs[motif] = 0
                numregs[motif] += len(regs)
                tFeat = sorted_stream(
                    segment_features(FeatureStream(regs,
                                                   fields=['start', 'end']),
                                     nbins=motif[1],
                                     upstream=_plot_flank,
                                     downstream=_plot_flank))
                for t in score_by_feature(
                    [s.read(chrom) for s in signals[gid]], tFeat):
                    data[motif][t[2]] += t[3:]
        files[gid]['pdf'] = unique_filename_in()
        new = True
        last = len(data)
        for motif, dat in data.iteritems():
            last -= 1
            mname, nbins = motif
            dat /= float(numregs[motif])
            X = range(-_plot_flank[1], _plot_flank[1] + nbins)
            for k in range(nbins):
                X[k + _plot_flank[1]] = str(k + 1)
            ####### Could do a heatmap (sort by intensity)...
            lineplot(X, [dat[:, n] for n in range(dat.shape[-1])],
                     mfrow=[4, 2],
                     output=files[gid]['pdf'],
                     new=new,
                     last=(last == 0),
                     legend=snames,
                     main=mname)
            new = False
            _datf = unique_filename_in()
            with open(_datf, "w") as dff:
                dff.write("\t".join([""] + [str(x) for x in X]) + "\n")
                for n, sn in enumerate(snames):
                    dff.write("\t".join([sn] + [str(x)
                                                for x in dat[:, n]]) + "\n")
            files[gid]['mat'].append((mname, _datf))
    return files
示例#49
0
 def test_complement(self):
     self.complement(**self.kw)
     with track(self.complement.output_files[0][0]) as t:
         s = t.read('chr1', fields=self.fields)
         content = list(s)
         expected = [('chr1', 0, 8, 0.0), ('chr1', 19, 21, 0.0),
                     ('chr1', 39, 197195432, 0.0)]
         self.assertListEqual(content, expected)
示例#50
0
def _combine(func, output, **kw):
    chrmeta = _get_chrmeta(**kw)
    format = kw.get('format') or 'sql'
    output += format
    tracks = kw['TrackMulti']['tracks']
    if not isinstance(tracks, list):
        tracks = [tracks]
    tracks = [track(sig, chrmeta=chrmeta) for sig in tracks]
    chrmeta = tracks[0].chrmeta
    tout = track(output, chrmeta=chrmeta, info={'datatype': 'qualitative'})
    for chrom in chrmeta:
        trackList = [sig.read(chrom) for sig in tracks]
        res = combine(trackList, fn=func)
        tout.fields = res.fields
        tout.write(res, chrom=chrom, clip=True)
    tout.close()
    return output
示例#51
0
 def test_skip_header(self):
     t = track(self.bed)  # skips the first line by default (header=None)
     L1 = len([line for line in t.read()])
     t = track(self.bed, header=None)  # same
     L11 = len([line for line in t.read()])
     t = track(self.bed, header=True)  # skips the first line
     L111 = len([line for line in t.read()])
     t = track(self.bed, header=5)  # skips 5 lines
     L2 = len([line for line in t.read()])
     t = track(self.bed,
               header='track')  # skips lines starting with 'track'
     L3 = len([line for line in t.read()])
     t = track(self.bed,
               header=['track',
                       'chr'])  # skips lines starting with 'track' or 'chr'
     L4 = len([line for line in t.read()])
     t = track(self.bed,
               header=['track', 'chrII'
                       ])  # skips lines starting with 'track' or 'chrII'
     L5 = len([line for line in t.read()])
     self.assertEqual(L1, L11)
     self.assertEqual(L1, L111)
     self.assertEqual(L1 - 4, L2)
     self.assertEqual(L1, L3)
     self.assertEqual(L4, 0)
     self.assertEqual(L1 - 11, L5)
     t.close()
示例#52
0
 def test_bed(self):  # as general TextTrack
     shutil.copy(self.bed,
                 os.path.join(path, 'test'))  # guess extension from header
     t = track(os.path.join(path, 'test'), format='bed', fields=self.fields)
     s = t.read()
     s.next()
     self.assertIsInstance(t, BedTrack)
     self.assertEqual(t.format, 'bed')
     self.assertListEqual(t.fields, self.fields)
示例#53
0
 def test_union(self):
     self.union(**self.kw)
     with track(self.union.output_files[0][0]) as t:
         s = t.read(fields=self.fields)
         content = list(s)
         expected = [('chr1', 8, 10, 12.0), ('chr1', 10, 15, 17.0),
                     ('chr1', 15, 19, 12.0), ('chr1', 21, 24, 17.0),
                     ('chr1', 24, 35, 107.0), ('chr1', 35, 39, 90.0)]
         self.assertListEqual(content, expected)
示例#54
0
def merge_junc_files(trackList, assembly):
    out = track('all.junc',
                format='txt',
                fields=['chr', 'start', 'end', 'strand', 'score'])
    from bbcflib.genrep import Assembly
    a = Assembly(assembly)
    for c in a.chromosomes:
        tl = [
            track(t,
                  fields=['chr', 'start', 'end', 'strand', 'score'],
                  format='txt').read(str(c[0]) + '_' + c[1] + '.' + str(c[2]))
            for t in trackList
        ]
        #all = concatenate(tl,remove_duplicates=True)
        all = concatenate(tl,
                          group_by=['chr', 'start', 'end'],
                          aggregate={'score': lambda x: sum(x)})
        out.write(all, mode='append')
示例#55
0
 def test_ratios(self):
     self.plugin(
         **{
             'numerator': path + 'KO50.bedGraph',
             'denominator': path + 'WT50.bedGraph',
             'format': 'bedGraph'
         })
     with track(self.plugin.output_files[0][0]) as t:
         s = t.read()
         content = list(s)
示例#56
0
    def __call__(self, **kw):
        tracks = kw['TrackMulti']['tracks']
        if not isinstance(tracks, list): tracks = [tracks]
        minscore = kw.get('minscore')
        maxscore = kw.get('maxscore')
        minlength = kw.get('minlength')
        maxlength = kw.get('maxlength')
        selection = [{'chr': c} for c in kw.get('chrom', '').split(',')]
        if minscore or maxscore:
            if not minscore:
                minscore = -sys.maxint
            if not maxscore:
                maxscore = sys.maxint
            if minscore > maxscore:
                raise ValueError("Empty range: %f:%f" % (minscore, maxscore))
            for s in selection:
                s['score'] = (float(minscore), float(maxscore))
        if minlength or maxlength:
            minlength = int(minlength or 0)
            maxlength = int(maxlength or sys.maxint)
            if minlength > maxlength:
                raise ValueError("Empty range: %i:%i" % (minlength, maxlength))
            for s in selection:
                s['length'] = (minlength, maxlength)
        outtracks = []
        for tin in [track(t) for t in tracks]:
            outname = self.temporary_path(tin.name + "_filtered." + tin.format)
            tout = track(outname)
            outtracks.append(outname)
            outstream = tin.read(selection=selection)
            tout.write(outstream)
            tout.close()

        if len(outtracks) > 1:
            tar_name = self.temporary_path('Filtered_tracks.tgz')
            tar = tarfile.open(tar_name, "w:gz")
            [tar.add(f) for f in outtracks]
            tar.close()
            self.new_file(tar_name, 'archive')
        else:
            self.new_file(outtracks[0], 'output')
        return self.display_time()
示例#57
0
 def test_wig(self):
     wig = os.path.join(path, 'test.wig')
     t = convert(self.bed, wig)
     self.assertIsInstance(t, WigTrack)
     s = t.read()
     s.next()
     t.close()
     t = track(wig, format="wig", chrmeta={}, info=None)
     s = t.read()
     s.next()
     self.assertListEqual(t.fields, ['chr', 'start', 'end', 'score'])
示例#58
0
def to_bed(filename, assembly):
    t = track(filename,
              fields=['chr', 'start', 'end', 'strand', 'score'],
              chrmeta=assembly,
              format='txt')
    # Translate chr names
    s = t.read()
    s1 = map_chromosomes(s, t.assembly.chromosomes)
    # Prepare output bed file
    out = track(filename.rstrip('junc') + 'bed',
                fields=['chr', 'start', 'end', 'name', 'score', 'strand'])
    out.make_header({'name': filename, 'description': filename})
    mode = 'append'
    # Add junction names
    c = itertools.count()
    s2 = duplicate(s1, 'chr', 'name')
    s3 = apply(s2, 'name', lambda x: 'junction' + str(c.next()))
    # Write
    out.write(s3, mode=mode)
    out.close()