def make_bigwig_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) subgroups = [ cluster, trackhub.SubGroupDefinition( name='multi', label='multi', mapping={ 'uniq': 'uniq', 'all': 'all', }), ] composite = trackhub.CompositeTrack( name='composite', short_label='signal', dimensions='dimX=cluster dimY=multi', tracktype='bigWig', visibility='dense', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', visibility='dense', tracktype='bigWig', short_label='Signal') composite.add_view(signal_view) for i, (library_id, row) in enumerate(libraries.iterrows()): for track_index, track_type in enumerate(['uniq', 'all']): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = i * 10 + track_index track = trackhub.Track( url=make_home_url(row[track_type]), name="{:04d}".format(priority) + '_' + row.analysis_name + '_' + track_type, visibility='full', tracktype='bigWig', subgroups={'cluster': cluster_assignment, 'multi': track_type}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=priority, ) signal_view.add_tracks(track)
def make_bam_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) bam_composite = trackhub.CompositeTrack( name='reads', short_label='reads', dimensions='dimX=cluster', tracktype='bam', visibility='dense', ) bam_composite.add_subgroups([cluster]) trackdb.add_tracks(bam_composite) bam_view = trackhub.ViewTrack( name='readview', view='reads', visibility='dense', tracktype='bam', short_label='Reads') bam_composite.add_view(bam_view) curdir = os.getcwd() #os.chdir(public_dir) try: for i, (library_id, row) in enumerate(libraries.iterrows()): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = "{:03d}".format(i) path, name = os.path.split(row.bam) bam_index = row.bam + '.bai' if not os.path.exists(name): os.symlink(row.bam, name) if not os.path.exists(name + '.bai'): os.symlink(bam_index, name + '.bai') track = trackhub.Track( url=make_home_url(row['bam']), name=priority + '_' + row.analysis_name + '_reads', visibility='full', tracktype='bam', subgroups={'cluster': cluster_assignment}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=i, ) bam_view.add_tracks(track) finally: os.chdir(curdir)
def create_trackhub(OutFolder,ListFile,Genome,ChrSize, EMAIL,DesignFile): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": # header start with track_ paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile = [x.strip() for x in line.strip().split('\t')] colour = "" if sampleDesignDict: kfile = trackhub.helpers.sanitize(ifile[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((ifile[1],colour,ifile[0])) else: break fin.close() fileList = sorted(fileList, key=lambda x: x[2]) # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color,id in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: # convert bed to bigbed # sort bed file cmd = "sort -k1,1 -k2,2n " + ifile +" >" + ifile + "_sorted.bed" os.system(cmd) # bedToBigBed os.system("awk '$1 != \"track\" {$5=($5>1000)?1000:$5; print ;}' "+ifile+"_sorted.bed > "+ifile+"_srt.bed") if extension == "bed": cmd = "bedToBigBed "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" extension = "bb" if extension == "broadpeak": cmd = "bedToBigBed -type=bed6+3 -as=broadPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" if extension == "narrowpeak": cmd = "bedToBigBed -type=bed6+4 -as=narrowPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" os.system(cmd) # change ifile to new bigbed file ifile = ifile+".bb" if extension in TrackType.keys(): if sampleDesignDict: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension].split()[0]) makedir(os.path.join(OutFolder, Genome)) os.symlink("../../"+ifile, linkname) else: pass hub.render(staging=OutFolder)
def make_trackhub(): import trackhub data = read_peng_20180710_cluster_memberships() df = pandas.DataFrame( data, columns=['cell_id', 'cluster_name', 'value', 'color']) df['rgb'] = df['color'].apply( lambda channel: ','.join([str(int(x * 255)) for x in channel])) colors = df[['cluster_name', 'value', 'color', 'rgb']].drop_duplicates() hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="C1_peng_20180710_cluster", short_label="C1_peng_20180710_cluster", genome="mm10", email="*****@*****.**") subgroups = [ trackhub.SubGroupDefinition(name='multiread', label='multiread', mapping={ 'all': 'all_reads', 'uniq': 'unique_only', }) ] composite = trackhub.CompositeTrack( name='composite', short_label='bigwigs', dimensions='dimX=multiread', sortOrder='multiread', visibility='full', tracktype='bigWig', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack(name='signal', view='signal', visibility='full', tracktype='bigWig') composite.add_view(signal_view) subgroup_map = { '-mm10-M4-male_all.bw': 'all', '-mm10-M4-male_uniq.bw': 'uniq' } for suffix in ['-mm10-M4-male_all.bw', '-mm10-M4-male_uniq.bw']: for i, row in colors.iterrows(): name = row.value + subgroup_map[suffix] url = 'http://woldlab.caltech.edu/~diane/C1_peng_20180710_cluster_bigwigs/' + row.value + suffix #url = '../' + row.value + suffix track = trackhub.Track( name=trackhub.helpers.sanitize(name), long_label=row.cluster_name + ' ' + subgroup_map[suffix], url=url, #source=os.path.join('C1_peng_20180710_cluster_bigwigs/', name), visibility='full', tracktype='bigWig', subgroups={'multiread': subgroup_map[suffix]}, color=row.rgb) signal_view.add_tracks(track) print(trackdb) trackhub.upload.upload_hub( hub=hub, host='localhost', remote_dir= '/woldlab/loxcyc/home/diane/public_html/C1_peng_20180710_cluster_bigwigs/' )
email="*****@*****.**") # Subgroups provide a way of tagging tracks. If you give the same tag to # a bigwig and a bigbed, you can use that tag to select them both. This is # useful for, e.g., ChIP-seq tracks where the tag is "sample" or # "antibody", and you can select both the signal and the called peaks at # the same time. # # Here we're making some contrived subgroups just to illustrate their use. subgroups = [ # A subgroup to select the replicate number trackhub.SubGroupDefinition(name='num', label='Number', mapping={ '0': 'zero', '1': 'one', '2': 'two', }), # A contrived subgroup that will only tag it as "yes" if it's sample 1. trackhub.SubGroupDefinition(name='s1', label='is_sample_1', mapping={ 'y': 'Yes', 'n': 'No', }), # While the two different views we create below are a good way of # turning on/off the signal or regions in bulk, this subgroup allows us # to sort the tracks by "name" and then by "kind". This is helpful for
def create_trackhub(OutFolder,ListFile,Genome,EMAIL,DesignFile,Postfix,PathPrefix=''): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]+Postfix[0]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile,colour = line.strip().split('\t') if sampleDesignDict: kfile = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((PathPrefix.strip()+ifile,colour)) else: break fin.close() # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: pass elif extension in TrackType.keys(): if filename in sampleDesignDict: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension]) makedir(os.path.join(OutFolder, Genome)) os.symlink(ifile, linkname) else: pass hub.render(staging=OutFolder)