示例#1
0
def histone_track(name, color="200,0,0"):
    composite = trackhub.CompositeTrack(name=name + "_composite",
                                        short_label=name,
                                        tracktype='bigWig',
                                        visibility='full',
                                        color=color)
    signal_view = trackhub.ViewTrack(name=name + "_signal",
                                     view='signal',
                                     visibility='full',
                                     tracktype='bigWig',
                                     short_label=name + '_signal',
                                     autoScale="on")
    regions_view = trackhub.ViewTrack(name=name + '_region',
                                      view='regions',
                                      visibility='dense',
                                      tracktype='bigWig',
                                      short_label=name + '_regions')

    composite.add_view(signal_view)
    composite.add_view(regions_view)

    for signal_type in ["qvalues", "treat_pileup", "control_lambda"]:
        track = trackhub.Track(tracktype='bigWig',
                               name=name + "_" + signal_type,
                               url="%s_%s.bw" % (name, signal_type),
                               short_label=signal_type,
                               autoScale="on")
        signal_view.add_tracks(track)
    for region_type in ["peaks", "domains"]:
        track = trackhub.Track(name=name + "_" + region_type,
                               url="%s_%s.bb" % (name, region_type),
                               short_label=region_type,
                               tracktype='bigBed')
        regions_view.add_tracks(track)
    return composite
示例#2
0
def make_ucsc_hub(infile, outfile, *args):

    import trackhub
    import shutil
    import seaborn as sns

    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name=P.PARAMS["hub_name"],
        short_label=P.PARAMS.get("hub_short"),
        long_label=P.PARAMS.get("hub_long"),
        email=P.PARAMS["hub_email"],
        genome=P.PARAMS["genome_name"],
    )

    bigwigs = [fn for fn in infile if ".bigWig" in fn]
    colours = dict(zip(bigwigs, sns.color_palette("hls", len(set(bigwigs)))))
    bigbeds = [fn for fn in infile if ".bigBed" in fn]

    for bw in bigwigs:

        track = trackhub.Track(
            name=os.path.basename(bw).replace(".bigWig", ""),
            source=bw,  # filename to build this track from
            visibility="full",  # shows the full signal
            color=",".join([str(int(x * 255))
                            for x in colours[bw]]),  # brick red
            autoScale="on",  # allow the track to autoscale
            tracktype="bigWig",  # required when making a track
        )

        trackdb.add_tracks(track)

    for bb in bigbeds:
        track = trackhub.Track(
            name=os.path.basename(bb).replace(".bigBed", ""),
            source=bb,  # filename to build this track from
            color="0,0,0",  # brick red
            tracktype="bigBed",  # required when making a track
        )

        trackdb.add_tracks(track)

    # Stage the hub
    trackhub.upload.stage_hub(hub=hub, staging="hub_tmp_dir")

    # Copy to the new location
    shutil.copytree(
        "hub_tmp_dir",
        P.PARAMS["hub_dir"],
        dirs_exist_ok=True,
        symlinks=P.PARAMS.get("hub_symlink", False),
    )

    # Delete the staged hub
    shutil.rmtree("hub_tmp_dir")
示例#3
0
def handler(event, context):
    print("received event: " + json.dumps(event, indent=2))
    print("body is" + json.dumps(event['body'], indent=2))
    body = json.loads(event['body'])
    bucket = body['s3BucketName']
    # /tmp is the only writeable system
    tmp_dir = '/tmp/' + body['hubName']

    # First we initialize the components of a track hub
    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name=body['hubName'],
        short_label=body['shortLabel'],
        long_label=body['longLabel'],
        genome=body['genome'],
        email=body['email'])

    # Next, we add a track for every sample.
    for file_info in body['samples']:
        file_URL = file_info['URL']

        if 'name' in file_info:
            name = file_info['name']
        else:
            name = file_info['shortLabel']
        name = re.sub(' ', '_', name)
        name = trackhub.helpers.sanitize(name)
        track = trackhub.Track(
            name=name,  # track names can't have any spaces or special chars.
            url=file_URL,  # filename to build this track from
            visibility='full',  # shows the full signal
            color=file_info['color'],
            autoScale='on',  # allow the track to autoscale
            tracktype=file_info['trackType'],  # required when making a track
        )

        # Each track is added to the trackdb
        trackdb.add_tracks(track)

    # In this example we "upload" the hub locally. Files are created in the
    # "example_hub" directory, along with symlinks to the tracks' data files.
    trackhub.upload.stage_hub(hub=hub, staging=tmp_dir)

    # sync the directory to s3
    for r, d, f in os.walk(tmp_dir):
        for file in f:
            full_path = os.path.join(r, file)
            s3_path = re.sub(tmp_dir, body['hubName'], full_path)
            print("writing {} to {}".format(full_path, s3_path))
            s3_client.upload_file(full_path,
                                  bucket,
                                  s3_path,
                                  ExtraArgs={'ACL': 'public-read'})

    hubPath = "https://s3-us-west-2.amazonaws.com/dm-trackhubs/{}/{}".format(
        body['hubName'], body['hubName'] + '.hub.txt')
    return {
        "isBase64Encoded": False,
        "statusCode": 200,
        "body": json.dumps({'hubPath': hubPath})
    }
示例#4
0
def coverage_track(name, color):
    track = trackhub.Track(tracktype='bigWig',
                           name=name,
                           url="%s.bw" % name,
                           short_label=name,
                           autoScale="on",
                           visibility='full',
                           color=color)
    return track
def make_bam_trackhub(libraries, trackdb):
    cluster = trackhub.SubGroupDefinition(
            name='cluster',
            label='cluster',
            mapping=cluster_mapping)

    bam_composite = trackhub.CompositeTrack(
        name='reads',
        short_label='reads',
        dimensions='dimX=cluster',
        tracktype='bam',
        visibility='dense',
    )
    bam_composite.add_subgroups([cluster])
    trackdb.add_tracks(bam_composite)

    bam_view = trackhub.ViewTrack(
        name='readview',
        view='reads',
        visibility='dense',
        tracktype='bam',
        short_label='Reads')
    bam_composite.add_view(bam_view)

    curdir = os.getcwd()
    #os.chdir(public_dir)

    try:
        for i, (library_id, row) in enumerate(libraries.iterrows()):
            cluster_assignment = human_cluster_mapping[row.cluster_assignment]
            priority = "{:03d}".format(i)
            path, name = os.path.split(row.bam)
            bam_index = row.bam + '.bai'
            if not os.path.exists(name):
                os.symlink(row.bam, name)
            if not os.path.exists(name + '.bai'):
                os.symlink(bam_index, name + '.bai')

            track = trackhub.Track(
                url=make_home_url(row['bam']),
                name=priority + '_' + row.analysis_name + '_reads',
                visibility='full',
                tracktype='bam',
                subgroups={'cluster': cluster_assignment},
                color=hex_to_ucsc_color(colors[row.cluster_assignment]),
                priority=i,
            )
            bam_view.add_tracks(track)
    finally:
        os.chdir(curdir)
def make_bigwig_trackhub(libraries, trackdb):
    cluster = trackhub.SubGroupDefinition(
            name='cluster',
            label='cluster',
            mapping=cluster_mapping)

    subgroups = [
        cluster,
        trackhub.SubGroupDefinition(
            name='multi',
            label='multi',
            mapping={
                'uniq': 'uniq',
                'all': 'all',
            }),
    ]

    composite = trackhub.CompositeTrack(
        name='composite',
        short_label='signal',
        dimensions='dimX=cluster dimY=multi',
        tracktype='bigWig',
        visibility='dense',
    )
    composite.add_subgroups(subgroups)
    trackdb.add_tracks(composite)

    signal_view = trackhub.ViewTrack(
        name='signalviewtrack',
        view='signal',
        visibility='dense',
        tracktype='bigWig',
        short_label='Signal')
    composite.add_view(signal_view)

    for i, (library_id, row) in enumerate(libraries.iterrows()):
        for track_index, track_type in enumerate(['uniq', 'all']):
            cluster_assignment = human_cluster_mapping[row.cluster_assignment]
            priority = i * 10 + track_index
            track = trackhub.Track(
                url=make_home_url(row[track_type]),
                name="{:04d}".format(priority) + '_' + row.analysis_name + '_' + track_type,
                visibility='full',
                tracktype='bigWig',
                subgroups={'cluster': cluster_assignment, 'multi': track_type},
                color=hex_to_ucsc_color(colors[row.cluster_assignment]),
                priority=priority,
            )
            signal_view.add_tracks(track)
示例#7
0
    '''),
)

trackdb.add_tracks(overlay)

# As in the README example, we grab all the example bigwigs
for bigwig in glob.glob(os.path.join(trackhub.helpers.data_dir(),
                                     "*hg38*.bw")):
    track = trackhub.Track(
        name=trackhub.helpers.sanitize(os.path.basename(bigwig)),
        source=bigwig,
        visibility='full',
        tracktype='bigWig',
        viewLimits='-2:2',
        maxHeightPixels='8:50:128',
        html_string=dedent('''
        Track as part of composite
        --------------------------
        While the HTML file is uploaded for this track, UCSC currently
        displays the composite track's HTML instead.

        This track was from {}'''.format(os.path.basename(bigwig))),
    )
    signal_view.add_tracks(track)

    # For the multiWig overlay track, we need to add the track there as
    # well. However it needs a different name. We have all the pieces,
    # might as well just make another track object:
    track2 = trackhub.Track(
        name=trackhub.helpers.sanitize(os.path.basename(bigwig)) + 'agg',
        source=bigwig,
示例#8
0
import trackhub

hub, genome, genomes_file, trackdb = trackhub.default_hub(
    hub_name="bigPsl",
    short_label="bigPsl",
    long_label="bigPsl",
    genome="hg38",
    email="*****@*****.**",
)

track = trackhub.Track(
    name="bigPsl",
    tracktype="bigPsl",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigPsl.bb",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_bigPsl_hub")
示例#9
0
def populate_local_track_hub(overarching_path,
                             rna_info,
                             local_stage,
                             rbp_no_dict,
                             rbp_peaks,
                             rbp=""):
    """
    Populates a local directory with files conforming to structure required by
    the UCSC specification for displaying trackhubs on their servers.

    It is assumed that all the .bb files have already been generated and are
    stored under a directory in the following structure:
        /<source 1>/<rbp1>.bb
        /<source 1>/<rbp2>.bb
        ...
        /<source 1>/<rbpn1>.bb
        /<source 2>/<rbp1>.bb
        ...
        ...
        /<source m>/<rbpnm>.bb


    This function simply copies the files into a trackhub structure and saves it
    in a specified directory.

    :param overarching_path: Directory under which the .bb files are stored in
        the structure specified above.
    :param rna_info: a dictionary used to represent RNA of interest. It should
        include the 'official_name' and 'chr_n' (chromosome number).
    :param local_stage: the directory to which the local trackhub structured
        files should be saved.
    :param rbp_no_dict: A dictionary containing data sources as keys and the
        number of RBPs discovered (to bind to RNA of interest) by each source as
        value.
    :param rbp_peaks: A dictionary containing data sources as keys and the
        highest number of RBPs discovered to bind to the RNA at one (nucleotide)
        point (just from that data source) as value.
    :param rbp: Name of one RBP of interest, among those binding to the RNA of
        interest (useful only for taking its perspective in competitive /
        cooperative relationship investigation.)

    """

    rna = rna_info['official_name']
    rna_chr_no = rna_info['chr_n']

    threshold_config_file = open(overarching_path + "threshold_config.txt")
    _str = threshold_config_file.read()
    competitive_threshold_bp = _str.split("\n")[0].split()[-1]
    cooperative_threshold_bp = _str.split("\n")[1].split()[-1]

    print(OVERLAP_CONFLICT)

    rnas = rna
    proteins = rbp
    hub_name = "RBPs on " + rnas + " " + OVERLAP_CONFLICT
    hub, _, _, trackdb = trackhub.default_hub(
        hub_name=hub_name,
        short_label=("RBPs on " + rnas + " w.r.t. " + proteins + ": " +
                     OVERLAP_CONFLICT),
        long_label=(
            "RNA binding proteins on long non-coding RNAs " + rnas +
            " with respect to the binding sites of " + proteins +
            ". The red sites are the places where proteins have competitive"
            " binding with " + proteins + ", whereas green sites are places"
            " where cooperative binding occurs. The thresholds used for"
            " competitive binding was 0bp to " +
            str(competitive_threshold_bp) + "bp and " +
            str(competitive_threshold_bp) + "bp to " +
            str(cooperative_threshold_bp) + "bp for cooperative binding"),
        genome=GENOME_VERSION,
        email="*****@*****.**")

    print("Hub set up")
    for filename in glob.iglob(overarching_path + "**/*.bb", recursive=True):
        # print(filename)
        _, _, _, _, _, _, category, name = filename.split("/")
        rbp = name.split("_")[0]
        rbp = rbp.replace(",", "_")
        rbp = rbp.replace("*", "_mut_")
        rbp = rbp.replace("(", "")
        rbp = rbp.replace(")", "")

        visibility = UCSC_TRACK_VISIBILITY
        # TODO: consider options for this for the user (add to Config at least)

        track = trackhub.Track(
            name=rbp + "_" + category + "binding_sites",
            short_label=rbp + "_" + category,
            long_label=(
                "Binding sites of " + rbp + " derived from " +
                data_load_sources_supported[
                    data_load_sources_supported_short_form.index(category)]),
            source=filename,
            tracktype='bigBed 9 +',
            itemRgb="on",
            spectrum="on",
            visibility=visibility,
            chromosomes="chr" + str(rna_chr_no),
            # labelFields="", l
            # defaultLabelFields="", l
            # mouseOverField="" l
            # labelFields=",".join(
            #     [column_data[category]["names"][i]
            #     for i in column_data[category]["interest"]]
            # ),
            # defaultLabelFields=",".join(
            #     [column_data[category]["names"][i]
            #     for i in column_data[category]["default_label"]]
            # ),
            # mouseOverField=(
            #     column_data[category]["names"][column_data[category]
            #     ["default_mouse_over"]]
            # ),
            # maxItems=25
        )

        # TODO: Add options for mouse hover views of information

        trackdb.add_tracks(track)

    for filename in glob.iglob(overarching_path + "**/*.bw", recursive=True):
        _, _, _, _, _, _, data_load_source, name = filename.split("/")
        rbp_no = rbp_no_dict[data_load_source]
        rbp_peak = max(rbp_peaks.values())
        rbp_peak = (rbp_peak // 10 + 1) * 10
        visibility = "full"

        # TODO: consider options for this for the user (add to Config at least)

        track = trackhub.Track(
            name=rna + "_" + data_load_source + "_density_plot",
            short_label="00 " + rna + "_density",
            long_label=("Density plot of " + str(rbp_no) + " RBPs on " + rna +
                        " using data from " + data_load_source.upper()),
            source=filename,
            tracktype='bigWig',
            color="128,0,0",  # TODO: what color ought bigWig density plots be?
            visibility=visibility,
            chromosomes="chr" + str(rna_chr_no),
            viewLimits="0:" + str(rbp_peak),
            maxHeightPixels="128:50:8",
            autoScale="on")

        trackdb.add_tracks(track)

    print(hub)
    print(local_stage)

    trackhub.upload.stage_hub(hub, staging=local_stage)

    return hub_name
示例#10
0
import trackhub

hub, genome, genomes_file, trackdb = trackhub.default_hub(
    hub_name="vcfTabix",
    short_label="vcfTabix",
    long_label="vcfTabix",
    genome="hg19",
    defaultPos="chr21:33034804-33037719",
    email="*****@*****.**",
)

track = trackhub.Track(
    tracktype="vcfTabix",
    name="VCF_Example_One",
    chromosomes="chr21",
    visibility="pack",
    url="http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_vcfTabix_hub")
示例#11
0
import trackhub
hub, genomes_file, genome, trackdb = trackhub.default_hub(
    hub_name='hic',
    defaultPos='chr21:32000000-35000000',
    short_label='hic',
    long_label='hic',
    genome='hg19',
    email='*****@*****.**')

track = trackhub.Track(
    name='examplehicTrack',
    tracktype='hic',
    visibility='dense',
    url=
    'http://hgdownload.soe.ucsc.edu/gbdb/hg19/bbi/hic/GSE63525_GM12878_insitu_primary+replicate_combined.hic',
    longLabel=
    'This hic file shows in situ Hi-C data from Rao et al. (2014) on the GM12878 cell line'
)

trackdb.add_tracks(track)
trackhub.upload.upload_hub(hub=hub,
                           host='localhost',
                           remote_dir='example_hubs/example_hic_hub')
示例#12
0
genomes_file = trackhub.GenomesFile()
hub.add_genomes_file(genomes_file)

# we also need to create a trackDb and add it to the genome
trackdb = trackhub.TrackDb()
genome.add_trackdb(trackdb)

# add the genome to the genomes file here:
genomes_file.add_genome(genome)

# Find all bigwigs for this genome in the example data directory, and make
# tracks for them
for bw in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bw")):
    name, _, _ = os.path.basename(bw).split(".")
    track = trackhub.Track(name=trackhub.helpers.sanitize(name),
                           source=bw,
                           tracktype='bigWig',
                           autoScale='on')
    trackdb.add_tracks(track)

# Same with bigBeds
for bb in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bigBed")):
    name, _ = os.path.basename(bb).split(".")
    track = trackhub.Track(name=trackhub.helpers.sanitize(name),
                           source=bb,
                           tracktype='bigBed')
    trackdb.add_tracks(track)

# Assembly hubs also need to have a Group specified. Here's how to do that:
example_group = trackhub.groups.GroupDefinition("example_tracks",
                                                label="Example Tracks",
                                                priority=1,
示例#13
0
import trackhub

hub, genomes_file, genome, trackdb = trackhub.default_hub(
    hub_name="bam",
    short_label="bam",
    long_label="bam",
    genome="hg18",
    defaultPos="chr21:33038946-33039092",
    email="*****@*****.**",
)

track = trackhub.Track(
    tracktype="bam",
    name="bam",
    description="bam ex. 1: 1000 genomes read alignments (individual na12878)",
    pairEndsByName=".",
    pairSearchRange="10000",
    chromosomes="chr21",
    maxWindowToDraw="200000",
    visibility="pack",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bamExample.bam",
)
trackdb.add_tracks(track)
trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_bam_hub")
示例#14
0
for bigwig in glob.glob('trackhub/test/data/sine-hg38-*.bw'):

    # track names can't have any spaces or special characters. Since we'll
    # be using filenames as names, and filenames have non-alphanumeric
    # characters, we use the sanitize() function to remove them.

    name = trackhub.helpers.sanitize(os.path.basename(bigwig))

    # We're keeping this relatively simple, but arguments can be
    # programmatically determined (color tracks based on sample; change scale
    # based on criteria, etc).

    track = trackhub.Track(
        name=name,  # track names can't have any spaces or special chars.
        source=bigwig,  # filename to build this track from
        visibility='full',  # shows the full signal
        color='128,0,5',  # brick red
        autoScale='on',  # allow the track to autoscale
        tracktype='bigWig',  # required when making a track
    )

    # Each track is added to the trackdb

    trackdb.add_tracks(track)

# In this example we "upload" the hub locally. Files are created in the
# "example_hub" directory, along with symlinks to the tracks' data files.
# This directory can then be pushed to GitHub or rsynced to a server.

trackhub.upload.upload_hub(hub=hub,
                           host='localhost',
                           remote_dir='example_hubs/example_hub')
示例#15
0
    colors = {
        '0': '#8C2B45',
        '1': '#2E3440',
        '2': '#6DBFA7',
    }
    return trackhub.helpers.hex2rgb(colors[number])


# As in the README example, we grab all the example bigwigs
for bigwig in glob.glob(os.path.join(trackhub.helpers.data_dir(),
                                     "*hg38*.bw")):
    track = trackhub.Track(
        name=trackhub.helpers.sanitize(os.path.basename(bigwig)),
        source=bigwig,
        visibility='full',
        tracktype='bigWig',
        viewLimits='-2:2',
        maxHeightPixels='8:50:128',
        subgroups=subgroups_from_filename(bigwig),
        color=color_from_filename(bigwig),
    )

    # Note that we add the track to the *view* rather than the trackDb as
    # we did in the README example.
    signal_view.add_tracks(track)

    # For the multiWig overlay track, we need to add the track there as
    # well. However it needs a different name. We have all the pieces,
    # might as well just make another track object:
    track2 = trackhub.Track(
        name=trackhub.helpers.sanitize(os.path.basename(bigwig)) + 'agg',
        source=bigwig,
示例#16
0
import trackhub

hub, genomes_file, genome, trackdb = trackhub.default_hub(
    hub_name="bigWig",
    defaultPos="chr21:33031597-33041570",
    short_label="bigWig",
    long_label="bigWig",
    genome="hg19",
    email="*****@*****.**",
)

track = trackhub.Track(
    name="bigWig",
    tracktype="bigWig",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw",
    shortLabel="Example Hub",
    longLabel="Example Hub",
    visibility="full",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(
    hub=hub, host="localhost", remote_dir="example_hubs/example_bigWig_hub"
)
示例#17
0
def create_trackhub(OutFolder,ListFile,Genome,EMAIL,DesignFile,Postfix,PathPrefix=''):
    ERROR_STR = 'ERROR: Please check design file'
    HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2']
    
    makedir(OutFolder)
    
    dIn = open(DesignFile, 'r')
    header = dIn.readline().strip().split(',')
    if header[:4] != HEADER:
        print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER)))
        sys.exit(1)

    paramColn = {}
    for i in range(len(header)):
      if header[i][:6]=="track_":
        paramColn[header[i][6:]]=i

    sampleDesignDict = {}
    designDict = {}
    if paramColn:
      while True:
        line = dIn.readline()
        if line:
          lspl = [x.strip() for x in line.strip().split(',')]
          lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]+Postfix[0]]
          lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)]
          sampleDesignDict[lspl[0][0]] = {}
          sampleDesignDict[lspl[0][1]] = {}
          for k in paramColn.keys():
            sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]]
            sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]]
            if k in designDict:
              designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]]
            else:
              designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]}
        else:
          break

    
    dIn.close()
      
    fileList = []
    fin = open(ListFile,'r')
    while True:
        line = fin.readline()
        if line:
            ifile,colour = line.strip().split('\t')
            if sampleDesignDict:
                kfile = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
                if kfile in sampleDesignDict:
                  if "color" in sampleDesignDict[kfile]:
                    h = sampleDesignDict[kfile]["color"].lstrip('#')
                    colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4)))
            if len(colour.strip()) == 0:
              colour = '0,0,178'
            fileList.append((PathPrefix.strip()+ifile,colour))
        else:
            break
            fin.close()
    

    # Initialize the components of a track hub, already connected together
    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="RNISRS_hub",
        short_label='Regeneromics Shared Resource hub',
        long_label='Regeneration Next Initiative Regeneromics Shared Resource hub',
        genome=Genome,
        email=EMAIL)
    
    # create compositeTracks
    if sampleDesignDict:
      composite = trackhub.CompositeTrack(
        name = 'composite',
        short_label='singlal'
      )
      # Add those subgroups to the composite track
      subgroups = []
      for k in designDict.keys():
        if k!='color':
          subg = trackhub.SubGroupDefinition(
            name=k,
            label=k,
            mapping=designDict[k]
          )
          subgroups.append(subg)
      
      composite.add_subgroups(subgroups)
      
      # Add the composite track to the trackDb
      trackdb.add_tracks(composite)
      signal_view = trackhub.ViewTrack(
          name='signalviewtrack',
          view='signal',
          short_label='Signal')
      composite.add_view(signal_view)
      regions_view = trackhub.ViewTrack(
          name='regionsviewtrack',
          view='regions',
          short_label='Regions')
      composite.add_view(regions_view)
    
    for ifile,color in fileList:
        extension = os.path.splitext(ifile)[1].replace(".", "").lower()
        filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
        if extension in ['bed','broadpeak','narrowpeak']:
          pass
        elif extension in TrackType.keys():
          if filename in sampleDesignDict:
            track = trackhub.Track(
              name=filename,
              source=ifile,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              subgroups=sampleDesignDict[filename],
              autoScale='on')
            signal_view.add_tracks(track)
          else:
            track = trackhub.Track(
              name=filename,
              source=ifile,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              autoScale='on')
            trackdb.add_tracks(track)
          linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension])
          makedir(os.path.join(OutFolder, Genome))
          os.symlink(ifile, linkname)
        else:
          pass
    
    hub.render(staging=OutFolder)
示例#18
0
    orderKey=4800
)

genomes_file = trackhub.GenomesFile()
trackdb = trackhub.TrackDb()
hub.add_genomes_file(genomes_file)
genomes_file.add_genome(genome)
genome.add_trackdb(trackdb)

str(genome)

for bw in glob.glob("data/*.bw"):
    name, _, _ = os.path.basename(bw).split(".")
    track = trackhub.Track(
        name=name,
        local_fn=bw,
        tracktype='bigWig',
        autoScale=True)
    trackdb.add_tracks(track)

for bb in glob.glob("data/*.bigBed"):
    name, _ = os.path.basename(bb).split(".")
    track = trackhub.Track(
        name=name,
        local_fn=bb,
        tracktype='bigBed')
    trackdb.add_tracks(track)

example_group = trackhub.groups.GroupDefinition(
    "example_tracks",
    label="Example Tracks",
示例#19
0
def make_trackhub():
    import trackhub
    data = read_peng_20180710_cluster_memberships()
    df = pandas.DataFrame(
        data, columns=['cell_id', 'cluster_name', 'value', 'color'])
    df['rgb'] = df['color'].apply(
        lambda channel: ','.join([str(int(x * 255)) for x in channel]))

    colors = df[['cluster_name', 'value', 'color', 'rgb']].drop_duplicates()

    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="C1_peng_20180710_cluster",
        short_label="C1_peng_20180710_cluster",
        genome="mm10",
        email="*****@*****.**")

    subgroups = [
        trackhub.SubGroupDefinition(name='multiread',
                                    label='multiread',
                                    mapping={
                                        'all': 'all_reads',
                                        'uniq': 'unique_only',
                                    })
    ]

    composite = trackhub.CompositeTrack(
        name='composite',
        short_label='bigwigs',
        dimensions='dimX=multiread',
        sortOrder='multiread',
        visibility='full',
        tracktype='bigWig',
    )
    composite.add_subgroups(subgroups)
    trackdb.add_tracks(composite)

    signal_view = trackhub.ViewTrack(name='signal',
                                     view='signal',
                                     visibility='full',
                                     tracktype='bigWig')
    composite.add_view(signal_view)

    subgroup_map = {
        '-mm10-M4-male_all.bw': 'all',
        '-mm10-M4-male_uniq.bw': 'uniq'
    }
    for suffix in ['-mm10-M4-male_all.bw', '-mm10-M4-male_uniq.bw']:
        for i, row in colors.iterrows():
            name = row.value + subgroup_map[suffix]
            url = 'http://woldlab.caltech.edu/~diane/C1_peng_20180710_cluster_bigwigs/' + row.value + suffix
            #url = '../' + row.value + suffix
            track = trackhub.Track(
                name=trackhub.helpers.sanitize(name),
                long_label=row.cluster_name + ' ' + subgroup_map[suffix],
                url=url,
                #source=os.path.join('C1_peng_20180710_cluster_bigwigs/', name),
                visibility='full',
                tracktype='bigWig',
                subgroups={'multiread': subgroup_map[suffix]},
                color=row.rgb)
            signal_view.add_tracks(track)

    print(trackdb)
    trackhub.upload.upload_hub(
        hub=hub,
        host='localhost',
        remote_dir=
        '/woldlab/loxcyc/home/diane/public_html/C1_peng_20180710_cluster_bigwigs/'
    )
示例#20
0
    hub_name="bigBarChart",
    short_label="bigBarChart",
    long_label="bigBarChart",
    genome="hg38",
    defaultPos="chr14:95081796-95436280",
    email="*****@*****.**",
)
track = trackhub.Track(
    name="bigBarChart",
    tracktype="bigBarChart",
    visibility="full",
    shortLabel="bigBarChart",
    longLabel="Simple example bar chart track",
    barChartBars=
    "adiposeSubcut breastMamTissue colonTransverse muscleSkeletal wholeBlood",
    barChartColors="#FF6600 #33CCCC #CC9955 #AAAAFF #FF00BB",
    barChartLabel="Tissues",
    barChartMetric="median",
    barChartUnit="RPKM",
    bigDataUrl=
    "http://genome.ucsc.edu/goldenPath/help/examples/barChart/hg38.gtexTranscripts.bb",
    barChartMatrixUrl=
    "http://genome.ucsc.edu/goldenPath/help/examples/barChart/exampleMatrix.txt",
    barChartSampleUrl=
    "http://genome.ucsc.edu/goldenPath/help/examples/barChart/exampleSampleData.txt",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_bigBarChart_hub")
示例#21
0
def make_group_track(bigwigs: list,
                     key: Union[callable, str, int],
                     overlay=True) -> dict:
    '''Generates a UCSC super track by grouping inputs by the provided key.'''

    import trackhub

    super_tracks_dict = dict()
    for name, bws in groupby(sorted(bigwigs, key=key), key=key):

        bws = list(bws)
        name_sanitized = trackhub.helpers.sanitize(name)
        # Create a super track
        super_track = trackhub.SuperTrack(name=name_sanitized)

        # Create an overlay track
        if overlay:
            overlay_track = trackhub.AggregateTrack(
                name=f"{name_sanitized}_overlay",
                aggregate="transparentOverlay",
                visibility="full",
                tracktype="bigWig",
                maxHeightPixels="8:80:128",
                showSubtrackColorOnUi="on",
                windowingFunction="maximum",
            )

        # Generate entries for all of the tracks for this group
        for bw, color in zip(bws, get_colors(bigwigs)):

            bw_base = (os.path.basename(bw).replace(".bigWig", "").replace(
                ".normalised.", ""))
            bw_sanitized = trackhub.helpers.sanitize(bw_base)

            track = trackhub.Track(
                name=f"{bw_sanitized}_{name}",
                source=bw,
                visibility="hide",
                color=color,
                autoScale="off",
                tracktype="bigWig",
                windowingFunction="maximum",
            )

            track_sub = trackhub.Track(
                name=f"{bw_sanitized}_{name}_subtrack",
                source=bw,
                visibility="hide",
                color=color,
                autoScale="off",
                tracktype="bigWig",
                windowingFunction="maximum",
            )

            super_track.add_tracks(track)

            if overlay:
                overlay_track.add_subtrack(track_sub)

        if overlay:
            super_track.add_tracks(overlay_track)

        super_tracks_dict[name] = super_track

    return super_tracks_dict
示例#22
0
import trackhub

hub, genomes_file, genome, trackdb = trackhub.default_hub(
    hub_name="bigChain",
    short_label="bigChain",
    long_label="bigChain",
    genome="hg38",
    defaultPos="chr22:1-150754",
    email="*****@*****.**",
)

track = trackhub.Track(
    name="bigChain",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigChain.bb",
    shortLabel="bigChain",
    longLabel="bigChain Example Hub",
    tracktype="bigChain",
    visibility="pack",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_bigChain_hub")
示例#23
0
def create_trackhub(OutFolder,ListFile,Genome,ChrSize, EMAIL,DesignFile):
    ERROR_STR = 'ERROR: Please check design file'
    HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2']

    makedir(OutFolder)
    
    dIn = open(DesignFile, 'r')
    header = dIn.readline().strip().split(',')
    if header[:4] != HEADER:
        print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER)))
        sys.exit(1)

    paramColn = {}
    for i in range(len(header)):
      if header[i][:6]=="track_": # header start with track_
        paramColn[header[i][6:]]=i

    sampleDesignDict = {}
    designDict = {}
    if paramColn:
      while True:
        line = dIn.readline()
        if line:
          lspl = [x.strip() for x in line.strip().split(',')]
          lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]]
          lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)]
          sampleDesignDict[lspl[0][0]] = {}
          sampleDesignDict[lspl[0][1]] = {}
          for k in paramColn.keys():
            sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]]
            sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]]
            if k in designDict:
              designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]]
            else:
              designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]}
        else:
          break
    
    
    dIn.close()
    
    fileList = []
    fin = open(ListFile,'r')
    while True:
        line = fin.readline()
        if line:
            ifile = [x.strip() for x in line.strip().split('\t')]
            colour = ""
            if sampleDesignDict:
                kfile = trackhub.helpers.sanitize(ifile[0].replace(".", "_"), strict=False)
                if kfile in sampleDesignDict:
                  if "color" in sampleDesignDict[kfile]:
                    h = sampleDesignDict[kfile]["color"].lstrip('#')
                    colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4)))
            if len(colour.strip()) == 0:
              colour = '0,0,178'
            fileList.append((ifile[1],colour,ifile[0]))
        else:
            break
            fin.close()

    fileList = sorted(fileList, key=lambda x: x[2])
    
    # Initialize the components of a track hub, already connected together
    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="RNISRS_hub",
        short_label='Regeneromics Shared Resource hub',
        long_label='Regeneration Next Initiative Regeneromics Shared Resource hub',
        genome=Genome,
        email=EMAIL)
    
    # create compositeTracks
    if sampleDesignDict:
      composite = trackhub.CompositeTrack(
        name = 'composite',
        short_label='singlal'
      )
      # Add those subgroups to the composite track
      subgroups = []
      for k in designDict.keys():
        if k!='color':
          subg = trackhub.SubGroupDefinition(
            name=k,
            label=k,
            mapping=designDict[k]
          )
          subgroups.append(subg)
      
      composite.add_subgroups(subgroups)
      
      # Add the composite track to the trackDb
      trackdb.add_tracks(composite)
      signal_view = trackhub.ViewTrack(
          name='signalviewtrack',
          view='signal',
          short_label='Signal')
      composite.add_view(signal_view)
      regions_view = trackhub.ViewTrack(
          name='regionsviewtrack',
          view='regions',
          short_label='Regions')
      composite.add_view(regions_view)
    
    for ifile,color,id in fileList:
        extension = os.path.splitext(ifile)[1].replace(".", "").lower()
        filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
        if extension in ['bed','broadpeak','narrowpeak']:
          # convert bed to bigbed
          # sort bed file
          cmd = "sort -k1,1 -k2,2n " + ifile +" >" + ifile + "_sorted.bed"
          os.system(cmd)
          # bedToBigBed
          os.system("awk '$1 != \"track\" {$5=($5>1000)?1000:$5; print ;}' "+ifile+"_sorted.bed > "+ifile+"_srt.bed")
          if extension == "bed":
            cmd = "bedToBigBed "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
            extension = "bb"
          if extension == "broadpeak":
            cmd = "bedToBigBed -type=bed6+3 -as=broadPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
          if extension == "narrowpeak":
            cmd = "bedToBigBed -type=bed6+4 -as=narrowPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
          os.system(cmd)
          # change ifile to new bigbed file
          ifile = ifile+".bb"
        if extension in TrackType.keys():
          if sampleDesignDict:
            track = trackhub.Track(
              name=filename,
              source=filename,
              short_label=id,
              long_label=filename,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              subgroups=sampleDesignDict[filename],
              autoScale='on')
            signal_view.add_tracks(track)
          else:
            track = trackhub.Track(
              name=filename,
              source=filename,
              short_label=id,
              long_label=filename,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              autoScale='on')
            trackdb.add_tracks(track)
          linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension].split()[0])
          makedir(os.path.join(OutFolder, Genome))
          os.symlink("../../"+ifile, linkname)
        else:
          pass
    
    hub.render(staging=OutFolder)
示例#24
0
import trackhub

hub, genome, genomes_file,trackdb = trackhub.default_hub(
     hub_name="bigMaf",
     short_label="bigMaf",
     long_label="bigMaf",
     genome="hg38",
     email="*****@*****.**")

track = trackhub.Track(
    name="bigMaf",
    frames="http://genome.ucsc.edu/goldenPath/help/examples/bigMafFrames.bb",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/bigMaf.bb",
    tracktype="bigMaf",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(
    hub=hub, host="localhost", remote_dir="example_hubs/example_bigMaf_hub"
)
示例#25
0
trackhub.settings.VALIDATE = False
hub, genome, genomes_file, trackdb = trackhub.default_hub(
    hub_name="bigLolly",
    short_label="bigLolly",
    long_label="bigLolly",
    genome="hg38",
    defaultPos="chr21:25891755-25891870",
    email="*****@*****.**",
)

add_kwargs = {
    "yAxisLabel.1": "0 on 30,30,190 0",
    "yAxisLabel.1": "5 on 30,30,190 5"
}

track = trackhub.Track(
    tracktype="bigLolly",
    bigDataUrl=
    "http://genome.ucsc.edu/goldenPath/help/examples/bigLollyExample3.bb",
    name="bigLolly",
    lollySizeField="lollySize",
    visibility="full",
    noStems="on",
    lollyMaxSize=10,
    **add_kwargs)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(hub=hub,
                           host="localhost",
                           remote_dir="example_hubs/example_bigLolly_hub")
示例#26
0
# point to your own files. In this example we use the path to the data
# included with trackhub.

for file_info in data['samples']:
    fileURL = file_info['URL']

    if 'name' in file_info:
        name = file_info['name']
    else:
        name = file_info['shortLabel']
    name = re.sub(' ', '_', name)
    name = trackhub.helpers.sanitize(name)
    track = trackhub.Track(
        name=name,  # track names can't have any spaces or special chars.
        url=fileURL,  # filename to build this track from
        visibility='full',  # shows the full signal
        color=file_info['color'],
        autoScale='on',  # allow the track to autoscale
        tracktype=file_info['trackType'],  # required when making a track
    )

    # Each track is added to the trackdb
    trackdb.add_tracks(track)

# In this example we "upload" the hub locally. Files are created in the
# "example_hub" directory, along with symlinks to the tracks' data files.
# This directory can then be pushed to GitHub or rsynced to a server.
trackhub.upload.stage_hub(hub=hub, staging=tmp_dir)

for r, d, f in os.walk(tmp_dir):
    for file in f:
        full_path = os.path.join(r, file)
示例#27
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""Wrapper to facilitate the creation of
        hub components using the package 'trackhub'.""")

    parser.add_argument('config', help="""The yaml configuration file.""")

    parser.add_argument('stagingDir', help="""The local output directory.""")
    #TODO: staging/upload, copy/link source files
    # args for trackhub
    defaultPos = None

    utils.add_file_options(parser)
    utils.add_logging_options(parser)
    args = parser.parse_args()
    utils.update_logging(args)

    msg = "[get-hub]: {}".format(' '.join(sys.argv))
    logger.info(msg)

    # check output path
    if os.path.exists(args.stagingDir):
        args.stagingDir = os.path.join(args.stagingDir, '')
    else:
        msg = "Invalid output path or wrong permission: {}. Terminating.".format(
            args.stagingDir)
        raise OSError(msg)

    # read configuration file
    config = yaml.load(open(args.config), Loader=yaml.FullLoader)
    # first check if we have at least one file format defined
    l = [re.search('^big.+?Files$', k) for k in config.keys()]
    if l.count(None) == len(l):
        msg = """There are no files listed in the configuration file and/or
        the wrong key was used. See the example configuration file provided
        with the package."""
        raise KeyError(msg)
    required_keys = ['hub', 'genomesFile', 'email']
    for fmt in ['Bed', 'Wig']:  # supported file format so far...
        files = 'big{}Files'.format(fmt)
        if files in config:
            required_keys.extend([files, 'big{}GlobalSettings'.format(fmt)])
    utils.check_keys_exist(config, required_keys)

    # default hub settings
    short_label = config.get('shortLabel', config['hub'])
    try:
        long_label = config.get('longLabel', 'shortLabel')
    except:
        long_label = config.get('longLabel', config['hub'])

    hub = trackhub.Hub(hub=config['hub'],
                       short_label=short_label,
                       long_label=long_label,
                       email=config['email'],
                       filename='hub.txt')

    genome_kwargs = {}
    if defaultPos:
        genome_kwargs['defaultPos'] = defaultPos
    genome = trackhub.Genome(config['genomesFile'], **genome_kwargs)
    genomesFile = trackhub.GenomesFile(filename='genomes.txt')
    trackDb = trackhub.TrackDb()
    hub.add_genomes_file(genomesFile)
    genomesFile.add_genome(genome)
    genome.add_trackdb(trackDb)

    # standard settings - individual tracks and/or tracks grouped with superTrack container
    # check if we have superTracks, else add tracks
    superTracks = {}
    if 'superTracks' in config.keys():
        for superTrack, tracks in config['superTracks'].items():
            superTracks[superTrack] = tracks
            defaultSuperTrackSettings = {
                'shortLabel': superTrack,
                'longLabel': superTrack
            }
            settings = config['superTrackSettings'].get(
                superTrack, defaultSuperTrackSettings)
            short = settings.get("shortLabel", superTrack)
            long = settings.get("longLabel", superTrack)
            s = trackhub.SuperTrack(
                name=superTrack,
                short_label=short,
                long_label=long,
            )
            exec('super{}=s'.format(superTrack))
            exec('trackDb.add_tracks(super{})'.format(superTrack))

    # first add tracks associated with superTracks, then add remaining tracks if any
    for superTrack, tracks in superTracks.items():
        for track in tracks:
            for fileType, globalSettings, fileSettings in zip(
                ['bigBedFiles', 'bigWigFiles'],
                ['bigBedGlobalSettings', 'bigWigGlobalSettings'],
                ['bigBedFileSettings', 'bigWigFileSettings']):
                if track in config[fileType]:
                    source = config[fileType][track]
                    gSettings = config[globalSettings].copy()
                    trackType = gSettings.pop('trackType')
                    fSettings = config[fileSettings].get(track, None)
                    if fSettings is not None:
                        finalSettings = {**gSettings, **fSettings}
                    else:
                        finalSettings = gSettings
                    # define track
                    track = trackhub.Track(
                        name=track,
                        source=os.path.basename(source),  # track
                        url=os.path.basename(source),
                        tracktype=trackType)
                    for setting, value in finalSettings.items():
                        exec("track.add_params({}='{}')".format(
                            setting, value))
                    # add to superTrack
                    exec('super{}.add_tracks(track)'.format(superTrack))

    # now process tracks not attached to a superTrack
    processed = sum(superTracks.values(), [])
    for fileType, globalSettings, fileSettings in zip(
        ['bigBedFiles', 'bigWigFiles'],
        ['bigBedGlobalSettings', 'bigWigGlobalSettings'],
        ['bigBedFileSettings', 'bigWigFileSettings']):

        for track in config[fileType]:
            if track not in processed:
                source = config[fileType][track]
                gSettings = config[globalSettings].copy()
                trackType = gSettings.pop('trackType')
                fSettings = config[fileSettings].get(track, None)
                if fSettings is not None:
                    finalSettings = {**gSettings, **fSettings}
                else:
                    finalSettings = gSettings
                # define track
                track = trackhub.Track(
                    name=track,
                    source=os.path.basename(source),  # track
                    url=os.path.basename(source),
                    tracktype=trackType)
                for setting, value in finalSettings.items():
                    exec("track.add_params({}='{}')".format(setting, value))
                # add
                trackDb.add_tracks(track)

    # TODO: files need to excist to be linked to the staging dir, currently only
    # the hub files/structure is created, so this ends with ValueError: target {} not found.

    # stage the hub (no upload)
    trackhub.upload.stage_hub(hub, staging=args.stagingDir)
示例#28
0
import trackhub

hub, genomes_file, genome, trackdb = trackhub.default_hub(
    hub_name="bigInteract",
    defaultPos="chr3:63820967-63880091",
    short_label="bigInteract",
    long_label="bigInteract",
    genome="hg19",
    email="*****@*****.**",
)

track = trackhub.Track(
    name="bigInteract",
    tracktype="bigInteract",
    bigDataUrl="http://genome.ucsc.edu/goldenPath/help/examples/interact/interactExample3.inter.bb",
    shortLabel="bigInteract",
    longLabel="bigInteract",
    visibility="pack",
    spectrum="on",
    scoreMin="175",
    maxHeightPixels="300:150:20",
)
trackdb.add_tracks(track)

trackhub.upload.upload_hub(
    hub=hub, host="localhost", remote_dir="example_hubs/example_bigInteract_hub"
)