def histone_track(name, color="200,0,0"): composite = trackhub.CompositeTrack(name=name + "_composite", short_label=name, tracktype='bigWig', visibility='full', color=color) signal_view = trackhub.ViewTrack(name=name + "_signal", view='signal', visibility='full', tracktype='bigWig', short_label=name + '_signal', autoScale="on") regions_view = trackhub.ViewTrack(name=name + '_region', view='regions', visibility='dense', tracktype='bigWig', short_label=name + '_regions') composite.add_view(signal_view) composite.add_view(regions_view) for signal_type in ["qvalues", "treat_pileup", "control_lambda"]: track = trackhub.Track(tracktype='bigWig', name=name + "_" + signal_type, url="%s_%s.bw" % (name, signal_type), short_label=signal_type, autoScale="on") signal_view.add_tracks(track) for region_type in ["peaks", "domains"]: track = trackhub.Track(name=name + "_" + region_type, url="%s_%s.bb" % (name, region_type), short_label=region_type, tracktype='bigBed') regions_view.add_tracks(track) return composite
def make_ucsc_hub(infile, outfile, *args): import trackhub import shutil import seaborn as sns hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name=P.PARAMS["hub_name"], short_label=P.PARAMS.get("hub_short"), long_label=P.PARAMS.get("hub_long"), email=P.PARAMS["hub_email"], genome=P.PARAMS["genome_name"], ) bigwigs = [fn for fn in infile if ".bigWig" in fn] colours = dict(zip(bigwigs, sns.color_palette("hls", len(set(bigwigs))))) bigbeds = [fn for fn in infile if ".bigBed" in fn] for bw in bigwigs: track = trackhub.Track( name=os.path.basename(bw).replace(".bigWig", ""), source=bw, # filename to build this track from visibility="full", # shows the full signal color=",".join([str(int(x * 255)) for x in colours[bw]]), # brick red autoScale="on", # allow the track to autoscale tracktype="bigWig", # required when making a track ) trackdb.add_tracks(track) for bb in bigbeds: track = trackhub.Track( name=os.path.basename(bb).replace(".bigBed", ""), source=bb, # filename to build this track from color="0,0,0", # brick red tracktype="bigBed", # required when making a track ) trackdb.add_tracks(track) # Stage the hub trackhub.upload.stage_hub(hub=hub, staging="hub_tmp_dir") # Copy to the new location shutil.copytree( "hub_tmp_dir", P.PARAMS["hub_dir"], dirs_exist_ok=True, symlinks=P.PARAMS.get("hub_symlink", False), ) # Delete the staged hub shutil.rmtree("hub_tmp_dir")
def handler(event, context): print("received event: " + json.dumps(event, indent=2)) print("body is" + json.dumps(event['body'], indent=2)) body = json.loads(event['body']) bucket = body['s3BucketName'] # /tmp is the only writeable system tmp_dir = '/tmp/' + body['hubName'] # First we initialize the components of a track hub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name=body['hubName'], short_label=body['shortLabel'], long_label=body['longLabel'], genome=body['genome'], email=body['email']) # Next, we add a track for every sample. for file_info in body['samples']: file_URL = file_info['URL'] if 'name' in file_info: name = file_info['name'] else: name = file_info['shortLabel'] name = re.sub(' ', '_', name) name = trackhub.helpers.sanitize(name) track = trackhub.Track( name=name, # track names can't have any spaces or special chars. url=file_URL, # filename to build this track from visibility='full', # shows the full signal color=file_info['color'], autoScale='on', # allow the track to autoscale tracktype=file_info['trackType'], # required when making a track ) # Each track is added to the trackdb trackdb.add_tracks(track) # In this example we "upload" the hub locally. Files are created in the # "example_hub" directory, along with symlinks to the tracks' data files. trackhub.upload.stage_hub(hub=hub, staging=tmp_dir) # sync the directory to s3 for r, d, f in os.walk(tmp_dir): for file in f: full_path = os.path.join(r, file) s3_path = re.sub(tmp_dir, body['hubName'], full_path) print("writing {} to {}".format(full_path, s3_path)) s3_client.upload_file(full_path, bucket, s3_path, ExtraArgs={'ACL': 'public-read'}) hubPath = "https://s3-us-west-2.amazonaws.com/dm-trackhubs/{}/{}".format( body['hubName'], body['hubName'] + '.hub.txt') return { "isBase64Encoded": False, "statusCode": 200, "body": json.dumps({'hubPath': hubPath}) }
def coverage_track(name, color): track = trackhub.Track(tracktype='bigWig', name=name, url="%s.bw" % name, short_label=name, autoScale="on", visibility='full', color=color) return track
def make_bam_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) bam_composite = trackhub.CompositeTrack( name='reads', short_label='reads', dimensions='dimX=cluster', tracktype='bam', visibility='dense', ) bam_composite.add_subgroups([cluster]) trackdb.add_tracks(bam_composite) bam_view = trackhub.ViewTrack( name='readview', view='reads', visibility='dense', tracktype='bam', short_label='Reads') bam_composite.add_view(bam_view) curdir = os.getcwd() #os.chdir(public_dir) try: for i, (library_id, row) in enumerate(libraries.iterrows()): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = "{:03d}".format(i) path, name = os.path.split(row.bam) bam_index = row.bam + '.bai' if not os.path.exists(name): os.symlink(row.bam, name) if not os.path.exists(name + '.bai'): os.symlink(bam_index, name + '.bai') track = trackhub.Track( url=make_home_url(row['bam']), name=priority + '_' + row.analysis_name + '_reads', visibility='full', tracktype='bam', subgroups={'cluster': cluster_assignment}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=i, ) bam_view.add_tracks(track) finally: os.chdir(curdir)
def make_bigwig_trackhub(libraries, trackdb): cluster = trackhub.SubGroupDefinition( name='cluster', label='cluster', mapping=cluster_mapping) subgroups = [ cluster, trackhub.SubGroupDefinition( name='multi', label='multi', mapping={ 'uniq': 'uniq', 'all': 'all', }), ] composite = trackhub.CompositeTrack( name='composite', short_label='signal', dimensions='dimX=cluster dimY=multi', tracktype='bigWig', visibility='dense', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', visibility='dense', tracktype='bigWig', short_label='Signal') composite.add_view(signal_view) for i, (library_id, row) in enumerate(libraries.iterrows()): for track_index, track_type in enumerate(['uniq', 'all']): cluster_assignment = human_cluster_mapping[row.cluster_assignment] priority = i * 10 + track_index track = trackhub.Track( url=make_home_url(row[track_type]), name="{:04d}".format(priority) + '_' + row.analysis_name + '_' + track_type, visibility='full', tracktype='bigWig', subgroups={'cluster': cluster_assignment, 'multi': track_type}, color=hex_to_ucsc_color(colors[row.cluster_assignment]), priority=priority, ) signal_view.add_tracks(track)
'''), ) trackdb.add_tracks(overlay) # As in the README example, we grab all the example bigwigs for bigwig in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*hg38*.bw")): track = trackhub.Track( name=trackhub.helpers.sanitize(os.path.basename(bigwig)), source=bigwig, visibility='full', tracktype='bigWig', viewLimits='-2:2', maxHeightPixels='8:50:128', html_string=dedent(''' Track as part of composite -------------------------- While the HTML file is uploaded for this track, UCSC currently displays the composite track's HTML instead. This track was from {}'''.format(os.path.basename(bigwig))), ) signal_view.add_tracks(track) # For the multiWig overlay track, we need to add the track there as # well. However it needs a different name. We have all the pieces, # might as well just make another track object: track2 = trackhub.Track( name=trackhub.helpers.sanitize(os.path.basename(bigwig)) + 'agg', source=bigwig,
import trackhub hub, genome, genomes_file, trackdb = trackhub.default_hub( hub_name="bigPsl", short_label="bigPsl", long_label="bigPsl", genome="hg38", email="*****@*****.**", ) track = trackhub.Track( name="bigPsl", tracktype="bigPsl", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigPsl.bb", ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_bigPsl_hub")
def populate_local_track_hub(overarching_path, rna_info, local_stage, rbp_no_dict, rbp_peaks, rbp=""): """ Populates a local directory with files conforming to structure required by the UCSC specification for displaying trackhubs on their servers. It is assumed that all the .bb files have already been generated and are stored under a directory in the following structure: /<source 1>/<rbp1>.bb /<source 1>/<rbp2>.bb ... /<source 1>/<rbpn1>.bb /<source 2>/<rbp1>.bb ... ... /<source m>/<rbpnm>.bb This function simply copies the files into a trackhub structure and saves it in a specified directory. :param overarching_path: Directory under which the .bb files are stored in the structure specified above. :param rna_info: a dictionary used to represent RNA of interest. It should include the 'official_name' and 'chr_n' (chromosome number). :param local_stage: the directory to which the local trackhub structured files should be saved. :param rbp_no_dict: A dictionary containing data sources as keys and the number of RBPs discovered (to bind to RNA of interest) by each source as value. :param rbp_peaks: A dictionary containing data sources as keys and the highest number of RBPs discovered to bind to the RNA at one (nucleotide) point (just from that data source) as value. :param rbp: Name of one RBP of interest, among those binding to the RNA of interest (useful only for taking its perspective in competitive / cooperative relationship investigation.) """ rna = rna_info['official_name'] rna_chr_no = rna_info['chr_n'] threshold_config_file = open(overarching_path + "threshold_config.txt") _str = threshold_config_file.read() competitive_threshold_bp = _str.split("\n")[0].split()[-1] cooperative_threshold_bp = _str.split("\n")[1].split()[-1] print(OVERLAP_CONFLICT) rnas = rna proteins = rbp hub_name = "RBPs on " + rnas + " " + OVERLAP_CONFLICT hub, _, _, trackdb = trackhub.default_hub( hub_name=hub_name, short_label=("RBPs on " + rnas + " w.r.t. " + proteins + ": " + OVERLAP_CONFLICT), long_label=( "RNA binding proteins on long non-coding RNAs " + rnas + " with respect to the binding sites of " + proteins + ". The red sites are the places where proteins have competitive" " binding with " + proteins + ", whereas green sites are places" " where cooperative binding occurs. The thresholds used for" " competitive binding was 0bp to " + str(competitive_threshold_bp) + "bp and " + str(competitive_threshold_bp) + "bp to " + str(cooperative_threshold_bp) + "bp for cooperative binding"), genome=GENOME_VERSION, email="*****@*****.**") print("Hub set up") for filename in glob.iglob(overarching_path + "**/*.bb", recursive=True): # print(filename) _, _, _, _, _, _, category, name = filename.split("/") rbp = name.split("_")[0] rbp = rbp.replace(",", "_") rbp = rbp.replace("*", "_mut_") rbp = rbp.replace("(", "") rbp = rbp.replace(")", "") visibility = UCSC_TRACK_VISIBILITY # TODO: consider options for this for the user (add to Config at least) track = trackhub.Track( name=rbp + "_" + category + "binding_sites", short_label=rbp + "_" + category, long_label=( "Binding sites of " + rbp + " derived from " + data_load_sources_supported[ data_load_sources_supported_short_form.index(category)]), source=filename, tracktype='bigBed 9 +', itemRgb="on", spectrum="on", visibility=visibility, chromosomes="chr" + str(rna_chr_no), # labelFields="", l # defaultLabelFields="", l # mouseOverField="" l # labelFields=",".join( # [column_data[category]["names"][i] # for i in column_data[category]["interest"]] # ), # defaultLabelFields=",".join( # [column_data[category]["names"][i] # for i in column_data[category]["default_label"]] # ), # mouseOverField=( # column_data[category]["names"][column_data[category] # ["default_mouse_over"]] # ), # maxItems=25 ) # TODO: Add options for mouse hover views of information trackdb.add_tracks(track) for filename in glob.iglob(overarching_path + "**/*.bw", recursive=True): _, _, _, _, _, _, data_load_source, name = filename.split("/") rbp_no = rbp_no_dict[data_load_source] rbp_peak = max(rbp_peaks.values()) rbp_peak = (rbp_peak // 10 + 1) * 10 visibility = "full" # TODO: consider options for this for the user (add to Config at least) track = trackhub.Track( name=rna + "_" + data_load_source + "_density_plot", short_label="00 " + rna + "_density", long_label=("Density plot of " + str(rbp_no) + " RBPs on " + rna + " using data from " + data_load_source.upper()), source=filename, tracktype='bigWig', color="128,0,0", # TODO: what color ought bigWig density plots be? visibility=visibility, chromosomes="chr" + str(rna_chr_no), viewLimits="0:" + str(rbp_peak), maxHeightPixels="128:50:8", autoScale="on") trackdb.add_tracks(track) print(hub) print(local_stage) trackhub.upload.stage_hub(hub, staging=local_stage) return hub_name
import trackhub hub, genome, genomes_file, trackdb = trackhub.default_hub( hub_name="vcfTabix", short_label="vcfTabix", long_label="vcfTabix", genome="hg19", defaultPos="chr21:33034804-33037719", email="*****@*****.**", ) track = trackhub.Track( tracktype="vcfTabix", name="VCF_Example_One", chromosomes="chr21", visibility="pack", url="http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz", ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_vcfTabix_hub")
import trackhub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name='hic', defaultPos='chr21:32000000-35000000', short_label='hic', long_label='hic', genome='hg19', email='*****@*****.**') track = trackhub.Track( name='examplehicTrack', tracktype='hic', visibility='dense', url= 'http://hgdownload.soe.ucsc.edu/gbdb/hg19/bbi/hic/GSE63525_GM12878_insitu_primary+replicate_combined.hic', longLabel= 'This hic file shows in situ Hi-C data from Rao et al. (2014) on the GM12878 cell line' ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host='localhost', remote_dir='example_hubs/example_hic_hub')
genomes_file = trackhub.GenomesFile() hub.add_genomes_file(genomes_file) # we also need to create a trackDb and add it to the genome trackdb = trackhub.TrackDb() genome.add_trackdb(trackdb) # add the genome to the genomes file here: genomes_file.add_genome(genome) # Find all bigwigs for this genome in the example data directory, and make # tracks for them for bw in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bw")): name, _, _ = os.path.basename(bw).split(".") track = trackhub.Track(name=trackhub.helpers.sanitize(name), source=bw, tracktype='bigWig', autoScale='on') trackdb.add_tracks(track) # Same with bigBeds for bb in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bigBed")): name, _ = os.path.basename(bb).split(".") track = trackhub.Track(name=trackhub.helpers.sanitize(name), source=bb, tracktype='bigBed') trackdb.add_tracks(track) # Assembly hubs also need to have a Group specified. Here's how to do that: example_group = trackhub.groups.GroupDefinition("example_tracks", label="Example Tracks", priority=1,
import trackhub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="bam", short_label="bam", long_label="bam", genome="hg18", defaultPos="chr21:33038946-33039092", email="*****@*****.**", ) track = trackhub.Track( tracktype="bam", name="bam", description="bam ex. 1: 1000 genomes read alignments (individual na12878)", pairEndsByName=".", pairSearchRange="10000", chromosomes="chr21", maxWindowToDraw="200000", visibility="pack", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bamExample.bam", ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_bam_hub")
for bigwig in glob.glob('trackhub/test/data/sine-hg38-*.bw'): # track names can't have any spaces or special characters. Since we'll # be using filenames as names, and filenames have non-alphanumeric # characters, we use the sanitize() function to remove them. name = trackhub.helpers.sanitize(os.path.basename(bigwig)) # We're keeping this relatively simple, but arguments can be # programmatically determined (color tracks based on sample; change scale # based on criteria, etc). track = trackhub.Track( name=name, # track names can't have any spaces or special chars. source=bigwig, # filename to build this track from visibility='full', # shows the full signal color='128,0,5', # brick red autoScale='on', # allow the track to autoscale tracktype='bigWig', # required when making a track ) # Each track is added to the trackdb trackdb.add_tracks(track) # In this example we "upload" the hub locally. Files are created in the # "example_hub" directory, along with symlinks to the tracks' data files. # This directory can then be pushed to GitHub or rsynced to a server. trackhub.upload.upload_hub(hub=hub, host='localhost', remote_dir='example_hubs/example_hub')
colors = { '0': '#8C2B45', '1': '#2E3440', '2': '#6DBFA7', } return trackhub.helpers.hex2rgb(colors[number]) # As in the README example, we grab all the example bigwigs for bigwig in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*hg38*.bw")): track = trackhub.Track( name=trackhub.helpers.sanitize(os.path.basename(bigwig)), source=bigwig, visibility='full', tracktype='bigWig', viewLimits='-2:2', maxHeightPixels='8:50:128', subgroups=subgroups_from_filename(bigwig), color=color_from_filename(bigwig), ) # Note that we add the track to the *view* rather than the trackDb as # we did in the README example. signal_view.add_tracks(track) # For the multiWig overlay track, we need to add the track there as # well. However it needs a different name. We have all the pieces, # might as well just make another track object: track2 = trackhub.Track( name=trackhub.helpers.sanitize(os.path.basename(bigwig)) + 'agg', source=bigwig,
import trackhub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="bigWig", defaultPos="chr21:33031597-33041570", short_label="bigWig", long_label="bigWig", genome="hg19", email="*****@*****.**", ) track = trackhub.Track( name="bigWig", tracktype="bigWig", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw", shortLabel="Example Hub", longLabel="Example Hub", visibility="full", ) trackdb.add_tracks(track) trackhub.upload.upload_hub( hub=hub, host="localhost", remote_dir="example_hubs/example_bigWig_hub" )
def create_trackhub(OutFolder,ListFile,Genome,EMAIL,DesignFile,Postfix,PathPrefix=''): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]+Postfix[0]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile,colour = line.strip().split('\t') if sampleDesignDict: kfile = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((PathPrefix.strip()+ifile,colour)) else: break fin.close() # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: pass elif extension in TrackType.keys(): if filename in sampleDesignDict: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=ifile, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension]) makedir(os.path.join(OutFolder, Genome)) os.symlink(ifile, linkname) else: pass hub.render(staging=OutFolder)
orderKey=4800 ) genomes_file = trackhub.GenomesFile() trackdb = trackhub.TrackDb() hub.add_genomes_file(genomes_file) genomes_file.add_genome(genome) genome.add_trackdb(trackdb) str(genome) for bw in glob.glob("data/*.bw"): name, _, _ = os.path.basename(bw).split(".") track = trackhub.Track( name=name, local_fn=bw, tracktype='bigWig', autoScale=True) trackdb.add_tracks(track) for bb in glob.glob("data/*.bigBed"): name, _ = os.path.basename(bb).split(".") track = trackhub.Track( name=name, local_fn=bb, tracktype='bigBed') trackdb.add_tracks(track) example_group = trackhub.groups.GroupDefinition( "example_tracks", label="Example Tracks",
def make_trackhub(): import trackhub data = read_peng_20180710_cluster_memberships() df = pandas.DataFrame( data, columns=['cell_id', 'cluster_name', 'value', 'color']) df['rgb'] = df['color'].apply( lambda channel: ','.join([str(int(x * 255)) for x in channel])) colors = df[['cluster_name', 'value', 'color', 'rgb']].drop_duplicates() hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="C1_peng_20180710_cluster", short_label="C1_peng_20180710_cluster", genome="mm10", email="*****@*****.**") subgroups = [ trackhub.SubGroupDefinition(name='multiread', label='multiread', mapping={ 'all': 'all_reads', 'uniq': 'unique_only', }) ] composite = trackhub.CompositeTrack( name='composite', short_label='bigwigs', dimensions='dimX=multiread', sortOrder='multiread', visibility='full', tracktype='bigWig', ) composite.add_subgroups(subgroups) trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack(name='signal', view='signal', visibility='full', tracktype='bigWig') composite.add_view(signal_view) subgroup_map = { '-mm10-M4-male_all.bw': 'all', '-mm10-M4-male_uniq.bw': 'uniq' } for suffix in ['-mm10-M4-male_all.bw', '-mm10-M4-male_uniq.bw']: for i, row in colors.iterrows(): name = row.value + subgroup_map[suffix] url = 'http://woldlab.caltech.edu/~diane/C1_peng_20180710_cluster_bigwigs/' + row.value + suffix #url = '../' + row.value + suffix track = trackhub.Track( name=trackhub.helpers.sanitize(name), long_label=row.cluster_name + ' ' + subgroup_map[suffix], url=url, #source=os.path.join('C1_peng_20180710_cluster_bigwigs/', name), visibility='full', tracktype='bigWig', subgroups={'multiread': subgroup_map[suffix]}, color=row.rgb) signal_view.add_tracks(track) print(trackdb) trackhub.upload.upload_hub( hub=hub, host='localhost', remote_dir= '/woldlab/loxcyc/home/diane/public_html/C1_peng_20180710_cluster_bigwigs/' )
hub_name="bigBarChart", short_label="bigBarChart", long_label="bigBarChart", genome="hg38", defaultPos="chr14:95081796-95436280", email="*****@*****.**", ) track = trackhub.Track( name="bigBarChart", tracktype="bigBarChart", visibility="full", shortLabel="bigBarChart", longLabel="Simple example bar chart track", barChartBars= "adiposeSubcut breastMamTissue colonTransverse muscleSkeletal wholeBlood", barChartColors="#FF6600 #33CCCC #CC9955 #AAAAFF #FF00BB", barChartLabel="Tissues", barChartMetric="median", barChartUnit="RPKM", bigDataUrl= "http://genome.ucsc.edu/goldenPath/help/examples/barChart/hg38.gtexTranscripts.bb", barChartMatrixUrl= "http://genome.ucsc.edu/goldenPath/help/examples/barChart/exampleMatrix.txt", barChartSampleUrl= "http://genome.ucsc.edu/goldenPath/help/examples/barChart/exampleSampleData.txt", ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_bigBarChart_hub")
def make_group_track(bigwigs: list, key: Union[callable, str, int], overlay=True) -> dict: '''Generates a UCSC super track by grouping inputs by the provided key.''' import trackhub super_tracks_dict = dict() for name, bws in groupby(sorted(bigwigs, key=key), key=key): bws = list(bws) name_sanitized = trackhub.helpers.sanitize(name) # Create a super track super_track = trackhub.SuperTrack(name=name_sanitized) # Create an overlay track if overlay: overlay_track = trackhub.AggregateTrack( name=f"{name_sanitized}_overlay", aggregate="transparentOverlay", visibility="full", tracktype="bigWig", maxHeightPixels="8:80:128", showSubtrackColorOnUi="on", windowingFunction="maximum", ) # Generate entries for all of the tracks for this group for bw, color in zip(bws, get_colors(bigwigs)): bw_base = (os.path.basename(bw).replace(".bigWig", "").replace( ".normalised.", "")) bw_sanitized = trackhub.helpers.sanitize(bw_base) track = trackhub.Track( name=f"{bw_sanitized}_{name}", source=bw, visibility="hide", color=color, autoScale="off", tracktype="bigWig", windowingFunction="maximum", ) track_sub = trackhub.Track( name=f"{bw_sanitized}_{name}_subtrack", source=bw, visibility="hide", color=color, autoScale="off", tracktype="bigWig", windowingFunction="maximum", ) super_track.add_tracks(track) if overlay: overlay_track.add_subtrack(track_sub) if overlay: super_track.add_tracks(overlay_track) super_tracks_dict[name] = super_track return super_tracks_dict
import trackhub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="bigChain", short_label="bigChain", long_label="bigChain", genome="hg38", defaultPos="chr22:1-150754", email="*****@*****.**", ) track = trackhub.Track( name="bigChain", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigChain.bb", shortLabel="bigChain", longLabel="bigChain Example Hub", tracktype="bigChain", visibility="pack", ) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_bigChain_hub")
def create_trackhub(OutFolder,ListFile,Genome,ChrSize, EMAIL,DesignFile): ERROR_STR = 'ERROR: Please check design file' HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2'] makedir(OutFolder) dIn = open(DesignFile, 'r') header = dIn.readline().strip().split(',') if header[:4] != HEADER: print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) sys.exit(1) paramColn = {} for i in range(len(header)): if header[i][:6]=="track_": # header start with track_ paramColn[header[i][6:]]=i sampleDesignDict = {} designDict = {} if paramColn: while True: line = dIn.readline() if line: lspl = [x.strip() for x in line.strip().split(',')] lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]] lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)] sampleDesignDict[lspl[0][0]] = {} sampleDesignDict[lspl[0][1]] = {} for k in paramColn.keys(): sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]] sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]] if k in designDict: designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]] else: designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]} else: break dIn.close() fileList = [] fin = open(ListFile,'r') while True: line = fin.readline() if line: ifile = [x.strip() for x in line.strip().split('\t')] colour = "" if sampleDesignDict: kfile = trackhub.helpers.sanitize(ifile[0].replace(".", "_"), strict=False) if kfile in sampleDesignDict: if "color" in sampleDesignDict[kfile]: h = sampleDesignDict[kfile]["color"].lstrip('#') colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4))) if len(colour.strip()) == 0: colour = '0,0,178' fileList.append((ifile[1],colour,ifile[0])) else: break fin.close() fileList = sorted(fileList, key=lambda x: x[2]) # Initialize the components of a track hub, already connected together hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="RNISRS_hub", short_label='Regeneromics Shared Resource hub', long_label='Regeneration Next Initiative Regeneromics Shared Resource hub', genome=Genome, email=EMAIL) # create compositeTracks if sampleDesignDict: composite = trackhub.CompositeTrack( name = 'composite', short_label='singlal' ) # Add those subgroups to the composite track subgroups = [] for k in designDict.keys(): if k!='color': subg = trackhub.SubGroupDefinition( name=k, label=k, mapping=designDict[k] ) subgroups.append(subg) composite.add_subgroups(subgroups) # Add the composite track to the trackDb trackdb.add_tracks(composite) signal_view = trackhub.ViewTrack( name='signalviewtrack', view='signal', short_label='Signal') composite.add_view(signal_view) regions_view = trackhub.ViewTrack( name='regionsviewtrack', view='regions', short_label='Regions') composite.add_view(regions_view) for ifile,color,id in fileList: extension = os.path.splitext(ifile)[1].replace(".", "").lower() filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False) if extension in ['bed','broadpeak','narrowpeak']: # convert bed to bigbed # sort bed file cmd = "sort -k1,1 -k2,2n " + ifile +" >" + ifile + "_sorted.bed" os.system(cmd) # bedToBigBed os.system("awk '$1 != \"track\" {$5=($5>1000)?1000:$5; print ;}' "+ifile+"_sorted.bed > "+ifile+"_srt.bed") if extension == "bed": cmd = "bedToBigBed "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" extension = "bb" if extension == "broadpeak": cmd = "bedToBigBed -type=bed6+3 -as=broadPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" if extension == "narrowpeak": cmd = "bedToBigBed -type=bed6+4 -as=narrowPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb" os.system(cmd) # change ifile to new bigbed file ifile = ifile+".bb" if extension in TrackType.keys(): if sampleDesignDict: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], subgroups=sampleDesignDict[filename], autoScale='on') signal_view.add_tracks(track) else: track = trackhub.Track( name=filename, source=filename, short_label=id, long_label=filename, color=color, visibility=Visibility[extension], tracktype=TrackType[extension], autoScale='on') trackdb.add_tracks(track) linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension].split()[0]) makedir(os.path.join(OutFolder, Genome)) os.symlink("../../"+ifile, linkname) else: pass hub.render(staging=OutFolder)
import trackhub hub, genome, genomes_file,trackdb = trackhub.default_hub( hub_name="bigMaf", short_label="bigMaf", long_label="bigMaf", genome="hg38", email="*****@*****.**") track = trackhub.Track( name="bigMaf", frames="http://genome.ucsc.edu/goldenPath/help/examples/bigMafFrames.bb", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigMaf.bb", tracktype="bigMaf", ) trackdb.add_tracks(track) trackhub.upload.upload_hub( hub=hub, host="localhost", remote_dir="example_hubs/example_bigMaf_hub" )
trackhub.settings.VALIDATE = False hub, genome, genomes_file, trackdb = trackhub.default_hub( hub_name="bigLolly", short_label="bigLolly", long_label="bigLolly", genome="hg38", defaultPos="chr21:25891755-25891870", email="*****@*****.**", ) add_kwargs = { "yAxisLabel.1": "0 on 30,30,190 0", "yAxisLabel.1": "5 on 30,30,190 5" } track = trackhub.Track( tracktype="bigLolly", bigDataUrl= "http://genome.ucsc.edu/goldenPath/help/examples/bigLollyExample3.bb", name="bigLolly", lollySizeField="lollySize", visibility="full", noStems="on", lollyMaxSize=10, **add_kwargs) trackdb.add_tracks(track) trackhub.upload.upload_hub(hub=hub, host="localhost", remote_dir="example_hubs/example_bigLolly_hub")
# point to your own files. In this example we use the path to the data # included with trackhub. for file_info in data['samples']: fileURL = file_info['URL'] if 'name' in file_info: name = file_info['name'] else: name = file_info['shortLabel'] name = re.sub(' ', '_', name) name = trackhub.helpers.sanitize(name) track = trackhub.Track( name=name, # track names can't have any spaces or special chars. url=fileURL, # filename to build this track from visibility='full', # shows the full signal color=file_info['color'], autoScale='on', # allow the track to autoscale tracktype=file_info['trackType'], # required when making a track ) # Each track is added to the trackdb trackdb.add_tracks(track) # In this example we "upload" the hub locally. Files are created in the # "example_hub" directory, along with symlinks to the tracks' data files. # This directory can then be pushed to GitHub or rsynced to a server. trackhub.upload.stage_hub(hub=hub, staging=tmp_dir) for r, d, f in os.walk(tmp_dir): for file in f: full_path = os.path.join(r, file)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="""Wrapper to facilitate the creation of hub components using the package 'trackhub'.""") parser.add_argument('config', help="""The yaml configuration file.""") parser.add_argument('stagingDir', help="""The local output directory.""") #TODO: staging/upload, copy/link source files # args for trackhub defaultPos = None utils.add_file_options(parser) utils.add_logging_options(parser) args = parser.parse_args() utils.update_logging(args) msg = "[get-hub]: {}".format(' '.join(sys.argv)) logger.info(msg) # check output path if os.path.exists(args.stagingDir): args.stagingDir = os.path.join(args.stagingDir, '') else: msg = "Invalid output path or wrong permission: {}. Terminating.".format( args.stagingDir) raise OSError(msg) # read configuration file config = yaml.load(open(args.config), Loader=yaml.FullLoader) # first check if we have at least one file format defined l = [re.search('^big.+?Files$', k) for k in config.keys()] if l.count(None) == len(l): msg = """There are no files listed in the configuration file and/or the wrong key was used. See the example configuration file provided with the package.""" raise KeyError(msg) required_keys = ['hub', 'genomesFile', 'email'] for fmt in ['Bed', 'Wig']: # supported file format so far... files = 'big{}Files'.format(fmt) if files in config: required_keys.extend([files, 'big{}GlobalSettings'.format(fmt)]) utils.check_keys_exist(config, required_keys) # default hub settings short_label = config.get('shortLabel', config['hub']) try: long_label = config.get('longLabel', 'shortLabel') except: long_label = config.get('longLabel', config['hub']) hub = trackhub.Hub(hub=config['hub'], short_label=short_label, long_label=long_label, email=config['email'], filename='hub.txt') genome_kwargs = {} if defaultPos: genome_kwargs['defaultPos'] = defaultPos genome = trackhub.Genome(config['genomesFile'], **genome_kwargs) genomesFile = trackhub.GenomesFile(filename='genomes.txt') trackDb = trackhub.TrackDb() hub.add_genomes_file(genomesFile) genomesFile.add_genome(genome) genome.add_trackdb(trackDb) # standard settings - individual tracks and/or tracks grouped with superTrack container # check if we have superTracks, else add tracks superTracks = {} if 'superTracks' in config.keys(): for superTrack, tracks in config['superTracks'].items(): superTracks[superTrack] = tracks defaultSuperTrackSettings = { 'shortLabel': superTrack, 'longLabel': superTrack } settings = config['superTrackSettings'].get( superTrack, defaultSuperTrackSettings) short = settings.get("shortLabel", superTrack) long = settings.get("longLabel", superTrack) s = trackhub.SuperTrack( name=superTrack, short_label=short, long_label=long, ) exec('super{}=s'.format(superTrack)) exec('trackDb.add_tracks(super{})'.format(superTrack)) # first add tracks associated with superTracks, then add remaining tracks if any for superTrack, tracks in superTracks.items(): for track in tracks: for fileType, globalSettings, fileSettings in zip( ['bigBedFiles', 'bigWigFiles'], ['bigBedGlobalSettings', 'bigWigGlobalSettings'], ['bigBedFileSettings', 'bigWigFileSettings']): if track in config[fileType]: source = config[fileType][track] gSettings = config[globalSettings].copy() trackType = gSettings.pop('trackType') fSettings = config[fileSettings].get(track, None) if fSettings is not None: finalSettings = {**gSettings, **fSettings} else: finalSettings = gSettings # define track track = trackhub.Track( name=track, source=os.path.basename(source), # track url=os.path.basename(source), tracktype=trackType) for setting, value in finalSettings.items(): exec("track.add_params({}='{}')".format( setting, value)) # add to superTrack exec('super{}.add_tracks(track)'.format(superTrack)) # now process tracks not attached to a superTrack processed = sum(superTracks.values(), []) for fileType, globalSettings, fileSettings in zip( ['bigBedFiles', 'bigWigFiles'], ['bigBedGlobalSettings', 'bigWigGlobalSettings'], ['bigBedFileSettings', 'bigWigFileSettings']): for track in config[fileType]: if track not in processed: source = config[fileType][track] gSettings = config[globalSettings].copy() trackType = gSettings.pop('trackType') fSettings = config[fileSettings].get(track, None) if fSettings is not None: finalSettings = {**gSettings, **fSettings} else: finalSettings = gSettings # define track track = trackhub.Track( name=track, source=os.path.basename(source), # track url=os.path.basename(source), tracktype=trackType) for setting, value in finalSettings.items(): exec("track.add_params({}='{}')".format(setting, value)) # add trackDb.add_tracks(track) # TODO: files need to excist to be linked to the staging dir, currently only # the hub files/structure is created, so this ends with ValueError: target {} not found. # stage the hub (no upload) trackhub.upload.stage_hub(hub, staging=args.stagingDir)
import trackhub hub, genomes_file, genome, trackdb = trackhub.default_hub( hub_name="bigInteract", defaultPos="chr3:63820967-63880091", short_label="bigInteract", long_label="bigInteract", genome="hg19", email="*****@*****.**", ) track = trackhub.Track( name="bigInteract", tracktype="bigInteract", bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/interact/interactExample3.inter.bb", shortLabel="bigInteract", longLabel="bigInteract", visibility="pack", spectrum="on", scoreMin="175", maxHeightPixels="300:150:20", ) trackdb.add_tracks(track) trackhub.upload.upload_hub( hub=hub, host="localhost", remote_dir="example_hubs/example_bigInteract_hub" )