def histone_track(name, color="200,0,0"): composite = trackhub.CompositeTrack(name=name + "_composite", short_label=name, tracktype='bigWig', visibility='full', color=color) signal_view = trackhub.ViewTrack(name=name + "_signal", view='signal', visibility='full', tracktype='bigWig', short_label=name + '_signal', autoScale="on") regions_view = trackhub.ViewTrack(name=name + '_region', view='regions', visibility='dense', tracktype='bigWig', short_label=name + '_regions') composite.add_view(signal_view) composite.add_view(regions_view) for signal_type in ["qvalues", "treat_pileup", "control_lambda"]: track = trackhub.Track(tracktype='bigWig', name=name + "_" + signal_type, url="%s_%s.bw" % (name, signal_type), short_label=signal_type, autoScale="on") signal_view.add_tracks(track) for region_type in ["peaks", "domains"]: track = trackhub.Track(name=name + "_" + region_type, url="%s_%s.bb" % (name, region_type), short_label=region_type, tracktype='bigBed') regions_view.add_tracks(track) return composite
def make_bam_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) bam_composite = trackhub.CompositeTrack( name='reads', short_label='reads', dimensions='dimX=cluster', tracktype='bam', visibility='dense', ) bam_composite.add_subgroups([cluster]) trackdb.add_tracks(bam_composite) bam_view = trackhub.ViewTrack( name='readview', view='reads', visibility='dense', tracktype='bam', short_label='Reads') bam_composite.add_view(bam_view) curdir = os.getcwd() #os.chdir(public_dir) try: for i, (library_id, row) in enumerate(libraries.iterrows()): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = "{:03d}".format(i) path, name = os.path.split(row.bam) bam_index = row.bam + '.bai' if not os.path.exists(name): os.symlink(row.bam, name) if not os.path.exists(name + '.bai'): os.symlink(bam_index, name + '.bai') track = trackhub.Track( url=make_home_url(row['bam']), name=priority + '_' + row.analysis_name + '_reads', visibility='full', tracktype='bam', subgroups={'cluster': cluster_assignment}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=i, ) bam_view.add_tracks(track) finally: os.chdir(curdir)
def make_bigwig_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) subgroups = [ cluster, trackhub.SubGroupDefinition( name='multi', label='multi', mapping={ 'uniq': 'uniq', 'all': 'all', }), ] composite = trackhub.CompositeTrack( name='composite', short_label='signal', dimensions='dimX=cluster dimY=multi', tracktype='bigWig', visibility='dense', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', visibility='dense', tracktype='bigWig', short_label='Signal') composite.add_view(signal_view) for i, (library_id, row) in enumerate(libraries.iterrows()): for track_index, track_type in enumerate(['uniq', 'all']): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = i * 10 + track_index track = trackhub.Track( url=make_home_url(row[track_type]), name="{:04d}".format(priority) + '_' + row.analysis_name + '_' + track_type, visibility='full', tracktype='bigWig', subgroups={'cluster': cluster_assignment, 'multi': track_type}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=priority, ) signal_view.add_tracks(track)
def create_trackhub(OutFolder,ListFile,Genome,ChrSize, EMAIL,DesignFile): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": # header start with track_ paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile = [x.strip() for x in line.strip().split('\t')] colour = "" if sampleDesignDict: kfile = trackhub.helpers.sanitize(ifile[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((ifile[1],colour,ifile[0])) else: break fin.close() fileList = sorted(fileList, key=lambda x: x[2]) # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color,id in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: # convert bed to bigbed # sort bed file cmd = "sort -k1,1 -k2,2n " + ifile +" >" + ifile + "_sorted.bed" os.system(cmd) # bedToBigBed os.system("awk '$1 != \"track\" {$5=($5>1000)?1000:$5; print ;}' "+ifile+"_sorted.bed > "+ifile+"_srt.bed") if extension == "bed": cmd = "bedToBigBed "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" extension = "bb" if extension == "broadpeak": cmd = "bedToBigBed -type=bed6+3 -as=broadPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" if extension == "narrowpeak": cmd = "bedToBigBed -type=bed6+4 -as=narrowPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" os.system(cmd) # change ifile to new bigbed file ifile = ifile+".bb" if extension in TrackType.keys(): if sampleDesignDict: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension].split()[0]) makedir(os.path.join(OutFolder, Genome)) os.symlink("../../"+ifile, linkname) else: pass hub.render(staging=OutFolder)
def make_trackhub(): import trackhub data = read_peng_20180710_cluster_memberships() df = pandas.DataFrame( data, columns=['cell_id', 'cluster_name', 'value', 'color']) df['rgb'] = df['color'].apply( lambda channel: ','.join([str(int(x * 255)) for x in channel])) colors = df[['cluster_name', 'value', 'color', 'rgb']].drop_duplicates() hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="C1_peng_20180710_cluster", short_label="C1_peng_20180710_cluster", genome="mm10", email="*****@*****.**") subgroups = [ trackhub.SubGroupDefinition(name='multiread', label='multiread', mapping={ 'all': 'all_reads', 'uniq': 'unique_only', }) ] composite = trackhub.CompositeTrack( name='composite', short_label='bigwigs', dimensions='dimX=multiread', sortOrder='multiread', visibility='full', tracktype='bigWig', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack(name='signal', view='signal', visibility='full', tracktype='bigWig') composite.add_view(signal_view) subgroup_map = { '-mm10-M4-male_all.bw': 'all', '-mm10-M4-male_uniq.bw': 'uniq' } for suffix in ['-mm10-M4-male_all.bw', '-mm10-M4-male_uniq.bw']: for i, row in colors.iterrows(): name = row.value + subgroup_map[suffix] url = 'http://woldlab.caltech.edu/~diane/C1_peng_20180710_cluster_bigwigs/' + row.value + suffix #url = '../' + row.value + suffix track = trackhub.Track( name=trackhub.helpers.sanitize(name), long_label=row.cluster_name + ' ' + subgroup_map[suffix], url=url, #source=os.path.join('C1_peng_20180710_cluster_bigwigs/', name), visibility='full', tracktype='bigWig', subgroups={'multiread': subgroup_map[suffix]}, color=row.rgb) signal_view.add_tracks(track) print(trackdb) trackhub.upload.upload_hub( hub=hub, host='localhost', remote_dir= '/woldlab/loxcyc/home/diane/public_html/C1_peng_20180710_cluster_bigwigs/' )
sortOrder='num=+ kind=-', tracktype='bigWig', visibility='full', ) # Add those subgroups to the composite track composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) # CompositeTracks compose different ViewTracks. We'll make one ViewTrack # for signal, and one for bigBed regions. signal_view = trackhub.ViewTrack(name='signalviewtrack', view='signal', visibility='full', tracktype='bigWig', short_label='Signal') regions_view = trackhub.ViewTrack(name='regionsviewtrack', view='regions', visibility='dense', tracktype='bigWig', short_label='Regions') # These need to be added to the composite. composite.add_tracks(signal_view) composite.add_tracks(regions_view) # Next we will build a multiWig overlay track which will show an example of # the signal as multiple bigWigs overlaying each other.
See [1]_ for more. .. [1] http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html """), ) # Add the composite track to the trackDb trackdb.add_tracks(composite) # A ViewTrack for signal. signal_view = trackhub.ViewTrack(name='signalviewtrack', view='signal', visibility='full', tracktype='bigWig', short_label='Signal', html_string=dedent(''' Signal view docs ---------------- While a file for this HTML documentation is created and uploaded, it will be overridden by the CompositeView's documentation. ''')) composite.add_tracks(signal_view) # This multiWig overlay track will shows multiple bigWigs overlaying each # other in the same track overlay = trackhub.AggregateTrack( aggregate='transparentOverlay', visibility='full', tracktype='bigWig', viewLimits='-2:2', maxHeightPixels='8:80:128',
def create_trackhub(OutFolder,ListFile,Genome,EMAIL,DesignFile,Postfix,PathPrefix=''): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]+Postfix[0]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile,colour = line.strip().split('\t') if sampleDesignDict: kfile = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((PathPrefix.strip()+ifile,colour)) else: break fin.close() # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: pass elif extension in TrackType.keys(): if filename in sampleDesignDict: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension]) makedir(os.path.join(OutFolder, Genome)) os.symlink(ifile, linkname) else: pass hub.render(staging=OutFolder)