def make_bigwig_trackhub(libraries, trackdb):
    cluster = trackhub.SubGroupDefinition(
            name='cluster',
            label='cluster',
            mapping=cluster_mapping)

    subgroups = [
        cluster,
        trackhub.SubGroupDefinition(
            name='multi',
            label='multi',
            mapping={
                'uniq': 'uniq',
                'all': 'all',
            }),
    ]

    composite = trackhub.CompositeTrack(
        name='composite',
        short_label='signal',
        dimensions='dimX=cluster dimY=multi',
        tracktype='bigWig',
        visibility='dense',
    )
    composite.add_subgroups(subgroups)
    trackdb.add_tracks(composite)

    signal_view = trackhub.ViewTrack(
        name='signalviewtrack',
        view='signal',
        visibility='dense',
        tracktype='bigWig',
        short_label='Signal')
    composite.add_view(signal_view)

    for i, (library_id, row) in enumerate(libraries.iterrows()):
        for track_index, track_type in enumerate(['uniq', 'all']):
            cluster_assignment = human_cluster_mapping[row.cluster_assignment]
            priority = i * 10 + track_index
            track = trackhub.Track(
                url=make_home_url(row[track_type]),
                name="{:04d}".format(priority) + '_' + row.analysis_name + '_' + track_type,
                visibility='full',
                tracktype='bigWig',
                subgroups={'cluster': cluster_assignment, 'multi': track_type},
                color=hex_to_ucsc_color(colors[row.cluster_assignment]),
                priority=priority,
            )
            signal_view.add_tracks(track)
def make_bam_trackhub(libraries, trackdb):
    cluster = trackhub.SubGroupDefinition(
            name='cluster',
            label='cluster',
            mapping=cluster_mapping)

    bam_composite = trackhub.CompositeTrack(
        name='reads',
        short_label='reads',
        dimensions='dimX=cluster',
        tracktype='bam',
        visibility='dense',
    )
    bam_composite.add_subgroups([cluster])
    trackdb.add_tracks(bam_composite)

    bam_view = trackhub.ViewTrack(
        name='readview',
        view='reads',
        visibility='dense',
        tracktype='bam',
        short_label='Reads')
    bam_composite.add_view(bam_view)

    curdir = os.getcwd()
    #os.chdir(public_dir)

    try:
        for i, (library_id, row) in enumerate(libraries.iterrows()):
            cluster_assignment = human_cluster_mapping[row.cluster_assignment]
            priority = "{:03d}".format(i)
            path, name = os.path.split(row.bam)
            bam_index = row.bam + '.bai'
            if not os.path.exists(name):
                os.symlink(row.bam, name)
            if not os.path.exists(name + '.bai'):
                os.symlink(bam_index, name + '.bai')

            track = trackhub.Track(
                url=make_home_url(row['bam']),
                name=priority + '_' + row.analysis_name + '_reads',
                visibility='full',
                tracktype='bam',
                subgroups={'cluster': cluster_assignment},
                color=hex_to_ucsc_color(colors[row.cluster_assignment]),
                priority=i,
            )
            bam_view.add_tracks(track)
    finally:
        os.chdir(curdir)
示例#3
0
def create_trackhub(OutFolder,ListFile,Genome,ChrSize, EMAIL,DesignFile):
    ERROR_STR = 'ERROR: Please check design file'
    HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2']

    makedir(OutFolder)
    
    dIn = open(DesignFile, 'r')
    header = dIn.readline().strip().split(',')
    if header[:4] != HEADER:
        print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER)))
        sys.exit(1)

    paramColn = {}
    for i in range(len(header)):
      if header[i][:6]=="track_": # header start with track_
        paramColn[header[i][6:]]=i

    sampleDesignDict = {}
    designDict = {}
    if paramColn:
      while True:
        line = dIn.readline()
        if line:
          lspl = [x.strip() for x in line.strip().split(',')]
          lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]]
          lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)]
          sampleDesignDict[lspl[0][0]] = {}
          sampleDesignDict[lspl[0][1]] = {}
          for k in paramColn.keys():
            sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]]
            sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]]
            if k in designDict:
              designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]]
            else:
              designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]}
        else:
          break
    
    
    dIn.close()
    
    fileList = []
    fin = open(ListFile,'r')
    while True:
        line = fin.readline()
        if line:
            ifile = [x.strip() for x in line.strip().split('\t')]
            colour = ""
            if sampleDesignDict:
                kfile = trackhub.helpers.sanitize(ifile[0].replace(".", "_"), strict=False)
                if kfile in sampleDesignDict:
                  if "color" in sampleDesignDict[kfile]:
                    h = sampleDesignDict[kfile]["color"].lstrip('#')
                    colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4)))
            if len(colour.strip()) == 0:
              colour = '0,0,178'
            fileList.append((ifile[1],colour,ifile[0]))
        else:
            break
            fin.close()

    fileList = sorted(fileList, key=lambda x: x[2])
    
    # Initialize the components of a track hub, already connected together
    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="RNISRS_hub",
        short_label='Regeneromics Shared Resource hub',
        long_label='Regeneration Next Initiative Regeneromics Shared Resource hub',
        genome=Genome,
        email=EMAIL)
    
    # create compositeTracks
    if sampleDesignDict:
      composite = trackhub.CompositeTrack(
        name = 'composite',
        short_label='singlal'
      )
      # Add those subgroups to the composite track
      subgroups = []
      for k in designDict.keys():
        if k!='color':
          subg = trackhub.SubGroupDefinition(
            name=k,
            label=k,
            mapping=designDict[k]
          )
          subgroups.append(subg)
      
      composite.add_subgroups(subgroups)
      
      # Add the composite track to the trackDb
      trackdb.add_tracks(composite)
      signal_view = trackhub.ViewTrack(
          name='signalviewtrack',
          view='signal',
          short_label='Signal')
      composite.add_view(signal_view)
      regions_view = trackhub.ViewTrack(
          name='regionsviewtrack',
          view='regions',
          short_label='Regions')
      composite.add_view(regions_view)
    
    for ifile,color,id in fileList:
        extension = os.path.splitext(ifile)[1].replace(".", "").lower()
        filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
        if extension in ['bed','broadpeak','narrowpeak']:
          # convert bed to bigbed
          # sort bed file
          cmd = "sort -k1,1 -k2,2n " + ifile +" >" + ifile + "_sorted.bed"
          os.system(cmd)
          # bedToBigBed
          os.system("awk '$1 != \"track\" {$5=($5>1000)?1000:$5; print ;}' "+ifile+"_sorted.bed > "+ifile+"_srt.bed")
          if extension == "bed":
            cmd = "bedToBigBed "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
            extension = "bb"
          if extension == "broadpeak":
            cmd = "bedToBigBed -type=bed6+3 -as=broadPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
          if extension == "narrowpeak":
            cmd = "bedToBigBed -type=bed6+4 -as=narrowPeak.as "+ifile+"_srt.bed"+" "+ChrSize+" "+ifile+".bb"
          os.system(cmd)
          # change ifile to new bigbed file
          ifile = ifile+".bb"
        if extension in TrackType.keys():
          if sampleDesignDict:
            track = trackhub.Track(
              name=filename,
              source=filename,
              short_label=id,
              long_label=filename,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              subgroups=sampleDesignDict[filename],
              autoScale='on')
            signal_view.add_tracks(track)
          else:
            track = trackhub.Track(
              name=filename,
              source=filename,
              short_label=id,
              long_label=filename,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              autoScale='on')
            trackdb.add_tracks(track)
          linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension].split()[0])
          makedir(os.path.join(OutFolder, Genome))
          os.symlink("../../"+ifile, linkname)
        else:
          pass
    
    hub.render(staging=OutFolder)
示例#4
0
def make_trackhub():
    import trackhub
    data = read_peng_20180710_cluster_memberships()
    df = pandas.DataFrame(
        data, columns=['cell_id', 'cluster_name', 'value', 'color'])
    df['rgb'] = df['color'].apply(
        lambda channel: ','.join([str(int(x * 255)) for x in channel]))

    colors = df[['cluster_name', 'value', 'color', 'rgb']].drop_duplicates()

    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="C1_peng_20180710_cluster",
        short_label="C1_peng_20180710_cluster",
        genome="mm10",
        email="*****@*****.**")

    subgroups = [
        trackhub.SubGroupDefinition(name='multiread',
                                    label='multiread',
                                    mapping={
                                        'all': 'all_reads',
                                        'uniq': 'unique_only',
                                    })
    ]

    composite = trackhub.CompositeTrack(
        name='composite',
        short_label='bigwigs',
        dimensions='dimX=multiread',
        sortOrder='multiread',
        visibility='full',
        tracktype='bigWig',
    )
    composite.add_subgroups(subgroups)
    trackdb.add_tracks(composite)

    signal_view = trackhub.ViewTrack(name='signal',
                                     view='signal',
                                     visibility='full',
                                     tracktype='bigWig')
    composite.add_view(signal_view)

    subgroup_map = {
        '-mm10-M4-male_all.bw': 'all',
        '-mm10-M4-male_uniq.bw': 'uniq'
    }
    for suffix in ['-mm10-M4-male_all.bw', '-mm10-M4-male_uniq.bw']:
        for i, row in colors.iterrows():
            name = row.value + subgroup_map[suffix]
            url = 'http://woldlab.caltech.edu/~diane/C1_peng_20180710_cluster_bigwigs/' + row.value + suffix
            #url = '../' + row.value + suffix
            track = trackhub.Track(
                name=trackhub.helpers.sanitize(name),
                long_label=row.cluster_name + ' ' + subgroup_map[suffix],
                url=url,
                #source=os.path.join('C1_peng_20180710_cluster_bigwigs/', name),
                visibility='full',
                tracktype='bigWig',
                subgroups={'multiread': subgroup_map[suffix]},
                color=row.rgb)
            signal_view.add_tracks(track)

    print(trackdb)
    trackhub.upload.upload_hub(
        hub=hub,
        host='localhost',
        remote_dir=
        '/woldlab/loxcyc/home/diane/public_html/C1_peng_20180710_cluster_bigwigs/'
    )
示例#5
0
    email="*****@*****.**")

# Subgroups provide a way of tagging tracks. If you give the same tag to
# a bigwig and a bigbed, you can use that tag to select them both. This is
# useful for, e.g., ChIP-seq tracks where the tag is "sample" or
# "antibody", and you can select both the signal and the called peaks at
# the same time.
#
# Here we're making some contrived subgroups just to illustrate their use.
subgroups = [

    # A subgroup to select the replicate number
    trackhub.SubGroupDefinition(name='num',
                                label='Number',
                                mapping={
                                    '0': 'zero',
                                    '1': 'one',
                                    '2': 'two',
                                }),

    # A contrived subgroup that will only tag it as "yes" if it's sample 1.
    trackhub.SubGroupDefinition(name='s1',
                                label='is_sample_1',
                                mapping={
                                    'y': 'Yes',
                                    'n': 'No',
                                }),

    # While the two different views we create below are a good way of
    # turning on/off the signal or regions in bulk, this subgroup allows us
    # to sort the tracks by "name" and then by "kind". This is helpful for
示例#6
0
def create_trackhub(OutFolder,ListFile,Genome,EMAIL,DesignFile,Postfix,PathPrefix=''):
    ERROR_STR = 'ERROR: Please check design file'
    HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2']
    
    makedir(OutFolder)
    
    dIn = open(DesignFile, 'r')
    header = dIn.readline().strip().split(',')
    if header[:4] != HEADER:
        print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER)))
        sys.exit(1)

    paramColn = {}
    for i in range(len(header)):
      if header[i][:6]=="track_":
        paramColn[header[i][6:]]=i

    sampleDesignDict = {}
    designDict = {}
    if paramColn:
      while True:
        line = dIn.readline()
        if line:
          lspl = [x.strip() for x in line.strip().split(',')]
          lspl[0] = [lspl[0]+Postfix[1], lspl[0]+'_R'+lspl[1]+Postfix[0]]
          lspl[0] = [trackhub.helpers.sanitize(lspl[0][0].replace(".", "_"), strict=False),trackhub.helpers.sanitize(lspl[0][1].replace(".", "_"), strict=False)]
          sampleDesignDict[lspl[0][0]] = {}
          sampleDesignDict[lspl[0][1]] = {}
          for k in paramColn.keys():
            sampleDesignDict[lspl[0][0]][k]=lspl[paramColn[k]]
            sampleDesignDict[lspl[0][1]][k]=lspl[paramColn[k]]
            if k in designDict:
              designDict[k][lspl[paramColn[k]]] = lspl[paramColn[k]]
            else:
              designDict[k] = {lspl[paramColn[k]]:lspl[paramColn[k]]}
        else:
          break

    
    dIn.close()
      
    fileList = []
    fin = open(ListFile,'r')
    while True:
        line = fin.readline()
        if line:
            ifile,colour = line.strip().split('\t')
            if sampleDesignDict:
                kfile = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
                if kfile in sampleDesignDict:
                  if "color" in sampleDesignDict[kfile]:
                    h = sampleDesignDict[kfile]["color"].lstrip('#')
                    colour = ','.join(str(x) for x in tuple(int(h[i:i+2], 16) for i in (0, 2, 4)))
            if len(colour.strip()) == 0:
              colour = '0,0,178'
            fileList.append((PathPrefix.strip()+ifile,colour))
        else:
            break
            fin.close()
    

    # Initialize the components of a track hub, already connected together
    hub, genomes_file, genome, trackdb = trackhub.default_hub(
        hub_name="RNISRS_hub",
        short_label='Regeneromics Shared Resource hub',
        long_label='Regeneration Next Initiative Regeneromics Shared Resource hub',
        genome=Genome,
        email=EMAIL)
    
    # create compositeTracks
    if sampleDesignDict:
      composite = trackhub.CompositeTrack(
        name = 'composite',
        short_label='singlal'
      )
      # Add those subgroups to the composite track
      subgroups = []
      for k in designDict.keys():
        if k!='color':
          subg = trackhub.SubGroupDefinition(
            name=k,
            label=k,
            mapping=designDict[k]
          )
          subgroups.append(subg)
      
      composite.add_subgroups(subgroups)
      
      # Add the composite track to the trackDb
      trackdb.add_tracks(composite)
      signal_view = trackhub.ViewTrack(
          name='signalviewtrack',
          view='signal',
          short_label='Signal')
      composite.add_view(signal_view)
      regions_view = trackhub.ViewTrack(
          name='regionsviewtrack',
          view='regions',
          short_label='Regions')
      composite.add_view(regions_view)
    
    for ifile,color in fileList:
        extension = os.path.splitext(ifile)[1].replace(".", "").lower()
        filename = trackhub.helpers.sanitize(os.path.splitext(os.path.basename(ifile))[0].replace(".", "_"), strict=False)
        if extension in ['bed','broadpeak','narrowpeak']:
          pass
        elif extension in TrackType.keys():
          if filename in sampleDesignDict:
            track = trackhub.Track(
              name=filename,
              source=ifile,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              subgroups=sampleDesignDict[filename],
              autoScale='on')
            signal_view.add_tracks(track)
          else:
            track = trackhub.Track(
              name=filename,
              source=ifile,
              color=color,
              visibility=Visibility[extension],
              tracktype=TrackType[extension],
              autoScale='on')
            trackdb.add_tracks(track)
          linkname=os.path.join(OutFolder, Genome, filename+"."+TrackType[extension])
          makedir(os.path.join(OutFolder, Genome))
          os.symlink(ifile, linkname)
        else:
          pass
    
    hub.render(staging=OutFolder)