Пример #1
0
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1',
                  tracktype='bigBed',
                  source=os.path.join(d, 'random-hg38-0.bigBed')),
            Track(
                name='track2',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-0.bedgraph.bw'),
            ),
            Track(
                name='track3',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-1.bedgraph.bw'),
                filename='3.bw',
            )
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)
Пример #2
0
    def setup(self):
        with pytest.warns(DeprecationWarning):
            self.hub = Hub(
                hub='example_hub',
                short_label='example hub',
                long_label='an example hub for testing',
                email='*****@*****.**')
            self.genomes_file = GenomesFile()
            self.genome = Genome('dm3')
            self.trackdb = TrackDb()

            self.tracks = [
                Track(
                    name='track1',
                    tracktype='bigBed',
                    local_fn=os.path.join(d, 'random-hg38-0.bigBed'),
                    remote_fn='1.bigbed',
                ),
                Track(
                    name='track2',
                    tracktype='bigWig',
                    local_fn=os.path.join(d, 'sine-hg38-0.bedgraph.bw'),
                    remote_fn='2.bw',
                ),
            ]
            self.hub.add_genomes_file(self.genomes_file)
            self.genomes_file.add_genome(self.genome)
            self.genome.add_trackdb(self.trackdb)
            self.trackdb.add_tracks(self.tracks)
Пример #3
0
def create_hub(geo,
               email=None,
               upload=False,
               upload_dir=".",
               user=None,
               host=None):
    """ Create an UCSC track hub from a Geo object
    """
    hub = Hub(hub=geo.gse,
              short_label=geo.gse,
              long_label="Hub for {0}".format(geo.gse),
              email=email)

    genomes_file = GenomesFile()

    trackdb = TrackDb()

    local_dir = geo.gse

    #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn))

    all_tracks = {}

    for sample in geo.samples.values():
        genome = sample['genome']
        all_tracks.setdefault(genome, [])

        name = re.sub('[^0-9a-zA-Z]+', '_', sample['name'])
        track = Track(
            name=name,
            url=os.path.join(HUB_URLBASE, geo.gse, genome,
                             "{0}.bw".format(sample['gsm'])),
            tracktype='bigWig',
            short_label=sample['gsm'],
            long_label=name,
            color='128,128,0',
            maxHeightPixels='30:30:11',
        )
        basename = os.path.basename(track.url)
        track.local_fn = os.path.join(local_dir, basename)
        track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename)
        all_tracks[genome].append(track)

    for build, tracks in all_tracks.items():

        genome = Genome(build)
        trackdb.add_tracks(tracks)
        genome.add_trackdb(trackdb)
        genomes_file.add_genome(genome)
        hub.add_genomes_file(genomes_file)

    results = hub.render()

    if upload:
        for track in trackdb.tracks:
            upload_track(track=track, host=host, user=user)

        upload_hub(hub=hub, host=host, user=user)
Пример #4
0
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None):
    """ Create an UCSC track hub from a Geo object
    """
    hub = Hub(
        hub=geo.gse,
        short_label=geo.gse,
        long_label="Hub for {0}".format(geo.gse),
        email=email)

    genomes_file = GenomesFile()
    
    trackdb = TrackDb()

    local_dir = geo.gse

    #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn))
    
    all_tracks = {}
    
    for sample in geo.samples.values():
        genome = sample['genome']
        all_tracks.setdefault(genome, [])

        name = re.sub('[^0-9a-zA-Z]+', '_',sample['name'])
        track = Track(
            name=name,
            url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])),
            tracktype='bigWig',
            short_label=sample['gsm'],
            long_label=name,
            color='128,128,0',
            maxHeightPixels='30:30:11',
            )
        basename = os.path.basename(track.url)
        track.local_fn = os.path.join(local_dir, basename)
        track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename)
        all_tracks[genome].append(track)
    
    for build,tracks in all_tracks.items(): 

        genome = Genome(build)
        trackdb.add_tracks(tracks)
        genome.add_trackdb(trackdb)
        genomes_file.add_genome(genome)
        hub.add_genomes_file(genomes_file)

    results = hub.render()

    if upload:
        for track in trackdb.tracks:
            upload_track(track=track, host=host, user=user)
    
        upload_hub(hub=hub, host=host, user=user)
Пример #5
0
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]
Пример #6
0
class TestUpload(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(
                name='track1',
                tracktype='bam',
                local_fn='data/track1.bam'
            ),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)


    @unittest.skipUnless(os.path.exists('data/track1.bam'), 'No test data')
    def test_upload(self):
        self.hub.remote_fn = os.path.join(
            'uploaded_version',
            self.hub.remote_fn)
        self.hub.render()
        upload.upload_hub(
            'localhost',
            None,
            self.hub,
            symlink=True,
            symlink_dir='staging',
            run_local=True,)
        for t, level in self.hub.leaves(Track):
            upload.upload_track(
                track=t, host='localhost', user=None, run_local=True)

    def test_render(self):
        trackdb = str(self.trackdb)
        # make sure some of the trackdb rendered correctly
        assert 'track track1' in trackdb
        assert 'bigDataUrl track1.bam' in trackdb
Пример #7
0
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(
                name='track1',
                tracktype='bam',
                local_fn='data/track1.bam'
            ),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)
Пример #8
0
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(
                name='track1',
                tracktype='bigBed',
                source=os.path.join(d, 'random-hg38-0.bigBed')
            ),
            Track(
                name='track2',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-0.bedgraph.bw'),
            ),
            Track(
                name='track3',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-1.bedgraph.bw'),
                filename='3.bw',
            )
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)
Пример #9
0
class TestUpload(object):
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam', local_fn='data/track1.bam'),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    @unittest.skipUnless(os.path.exists('data/track1.bam'), 'No test data')
    def test_upload(self):
        self.hub.remote_fn = os.path.join('uploaded_version',
                                          self.hub.remote_fn)
        self.hub.render()
        upload.upload_hub(
            'localhost',
            None,
            self.hub,
            symlink=True,
            symlink_dir='staging',
            run_local=True,
        )
        for t, level in self.hub.leaves(Track):
            upload.upload_track(track=t,
                                host='localhost',
                                user=None,
                                run_local=True)

    def test_render(self):
        trackdb = str(self.trackdb)
        # make sure some of the trackdb rendered correctly
        assert 'track track1' in trackdb
        assert 'bigDataUrl track1.bam' in trackdb
Пример #10
0
class TestUpload(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(
                name='track1',
                tracktype='bam',
                local_fn='data/track1.bam'
            ),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)


    def test_upload(self):
        self.hub.remote_fn = os.path.join(
            'uploaded_version',
            self.hub.remote_fn)
        self.hub.render()
        upload.upload_hub(
            'localhost',
            None,
            self.hub,
            symlink=True,
            symlink_dir='staging',
            run_local=True,)
        for t, level in self.hub.leaves(Track):
            upload.upload_track(
                track=t, host='localhost', user=None, run_local=True)
Пример #11
0
    def setup(self):
        self.hub = Hub(
            hub="example_hub",
            short_label="example hub",
            long_label="an example hub for testing",
            email="*****@*****.**",
        )
        self.genomes_file = GenomesFile()
        self.genome = Genome("dm3")
        self.trackdb = TrackDb()

        self.tracks = [Track(name="track1", tracktype="bam"), Track(name="track2", tracktype="bigWig")]
Пример #12
0
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam', local_fn='data/track1.bam'),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)
Пример #13
0
class TestUpload(object):
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam', local_fn='data/track1.bam'),
            Track(
                name='track2',
                tracktype='bigWig',
                local_fn='data/track2.bigwig',
            ),
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def test_upload(self):
        self.hub.remote_fn = os.path.join('uploaded_version',
                                          self.hub.remote_fn)
        self.hub.render()
        upload.upload_hub(
            'localhost',
            None,
            self.hub,
            symlink=True,
            symlink_dir='staging',
            run_local=True,
        )
        for t, level in self.hub.leaves(Track):
            upload.upload_track(track=t,
                                host='localhost',
                                user=None,
                                run_local=True)
Пример #14
0
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]
Пример #15
0
def ENCSR2viz(experiments_to_visualize):
	if DEBUG:
		print "Visualizing"
		print experiments_to_visualize

	from trackhub import Hub, GenomesFile, Genome, TrackDb, Track
	from trackhub.upload import upload_hub
	HUBHOST = 'http://cherry-vm45.stanford.edu'
	HUBDIR = 'jseth/trackhubs'
	USER = '******'
	URLBASE = os.path.join(HUBHOST, HUBDIR)
	EDWBASE = 'http://encodedcc.sdsc.edu/warehouse'
	GENOME = 'hg19'

	hub = Hub(
		hub='ENCODE',
		short_label='ENCODE',
		long_label='ENCODE',
		email='*****@*****.**')

	genomes_file = GenomesFile()
	genome = Genome(GENOME)
	trackdb = TrackDb()

	for accession in experiments_to_visualize:
		experiment_obj = get_ENCODE(accession)
		if DEBUG:
			print experiment_obj
		for file_id in experiment_obj['files']:
			file_obj = get_ENCODE(file_id)
			if file_obj['file_format'] in ['bigWig', 'bigBed', 'broadPeak', 'narrowPeak']:
				if file_obj['file_format'] in ['bigWig']:
					track_type = 'bigWig'
				elif file_obj['file_format'] in ['bigBed', 'broadPeak', 'narrowPeak']:
					track_type = 'bigBed'
				track = Track(
					name=str(file_obj['accession']),
					url=os.path.join(EDWBASE, str(file_obj['download_path'])),
					tracktype=track_type,
					long_label=str(file_obj['accession']),
					short_label=str(file_obj['output_type']),
					color='128,0,0',
					visibility='dense',
					metadata='cell_type=primary')
				print file_obj['accession']

				trackdb.add_tracks([track])

	genome.add_trackdb(trackdb)
	genomes_file.add_genome(genome)
	hub.add_genomes_file(genomes_file)

	results=hub.render()
	if DEBUG:
		print hub
		print '...'
		print genomes_file
		print '...'
		print genome
		print '...'
		print trackdb
	#upload_hub(hub=hub, host=HUBHOST, user=USER) #doesn't seem to work
	import subprocess
	subprocess.call("cd .. && rsync -r trackhub [email protected]:/www/html/jseth/trackhubs", shell=True)
	import webbrowser
	hubfile = str(hub.hub) + '.hub.txt'
	UCSC_url = 'http://genome.ucsc.edu/cgi-bin/hgTracks?udcTimeout=1&db=hg19' + \
				'&hubUrl=' + os.path.join(HUBHOST,HUBDIR,'trackhub',hubfile) # + \
				#'&hsS_doLoadUrl=submit&hgS_loadUrlName=' + os.path.join(HUBHOST,HUBDIR,'trackhub','session.txt')
	print UCSC_url
	webbrowser.open(UCSC_url)
Пример #16
0
def files2viz(files_to_visualize):
    if DEBUG:
        print "Visualizing"
        print files_to_visualize

    from trackhub import Hub, GenomesFile, Genome, TrackDb, Track
    from trackhub.upload import upload_hub
    HUBHOST = 'http://cherry-vm45.stanford.edu'
    HUBDIR = 'trackhubs'
    USER = '******'
    URLBASE = os.path.join(HUBHOST, HUBDIR)
    EDWBASE = 'http://encodedcc.sdsc.edu/warehouse'
    GENOME = 'hg19'

    hub = Hub(hub='Selected_ENCODE_Tracks',
              short_label='Selected_ENCODE_Tracks_short',
              long_label='Selected_ENCODE_Tracks_long',
              email='*****@*****.**')

    genomes_file = GenomesFile()
    genome = Genome(GENOME)
    trackdb = TrackDb()

    for accession in files_to_visualize:
        file_obj = get_ENCODE(accession)
        if DEBUG:
            print file_obj
        if file_obj['file_format'] == 'bigWig':
            track = Track(name=accession,
                          url=os.path.join(EDWBASE,
                                           str(file_obj['download_path'])),
                          tracktype='bigWig',
                          short_label=accession,
                          long_label=accession,
                          color='128,0,0',
                          visibility='full')
            trackdb.add_tracks([track])

    genome.add_trackdb(trackdb)
    genomes_file.add_genome(genome)
    hub.add_genomes_file(genomes_file)

    results = hub.render()
    if DEBUG:
        print hub
        print '...'
        print genomes_file
        print '...'
        print genome
        print '...'
        print trackdb
    #upload_hub(hub=hub, host=HUBHOST, user=USER) #doesn't seem to work
    import subprocess
    subprocess.call(
        "cd .. && rsync -r trackhub [email protected]:/www/html/trackhubs",
        shell=True)
    import webbrowser
    hubfile = str(hub.hub) + '.hub.txt'
    UCSC_url = 'http://genome.ucsc.edu/cgi-bin/hgTracks?udcTimeout=1&db=hg19' + \
       '&hubUrl=' + os.path.join(HUBHOST,HUBDIR,'trackhub',hubfile) #  + \
    #'&hsS_doLoadUrl=submit' + '&hgS_loadUrlName=' + os.path.join(HUBHOST,HUBDIR,'trackhub','session.txt')
    print UCSC_url
    webbrowser.open(UCSC_url)
Пример #17
0
class TestComponents(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]

    def CONNECT(self):
        """
        Connect the components together. The default setup creates the objects
        but does not connect them.
        """
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def DISCONNECT(self):
        """
        Re-run the setup, which results in unconnected components. Run
        CONNECT() to connect them up.
        """
        self.setup()

    def test_self_connection(self):
        """
        meta test: make sure the test class's connect/disconnect is working
        """
        assert self.hub.genomes_file is None

        self.CONNECT()
        assert self.hub.genomes_file is self.genomes_file

        self.DISCONNECT()
        assert self.hub.genomes_file is None

    # Filenames ---------------------------------------------------------------

    def test_hub_fns(self):
        # Default unconnected
        assert self.hub.filename == 'example_hub.hub.txt'

        # Connecting components should not change hub
        self.CONNECT()
        assert self.hub.filename == 'example_hub.hub.txt'
        self.DISCONNECT()

    def test_genome_file_fns(self):
        with pytest.raises(AttributeError):
            getattr(self.genomes_file, 'url')

        # When unconnected, filenames should be None
        assert self.genomes_file.filename is None

        #...though you can set them manually
        self.genomes_file.filename = 'local.genomes'
        assert self.genomes_file.filename == 'local.genomes'
        self.genomes_file.filename = None

        self.CONNECT()
        assert self.genomes_file.filename == 'example_hub.genomes.txt'

        # when connected, overriding works
        self.genomes_file.filename = 'local.genomes'
        assert self.genomes_file.filename == 'local.genomes'
        self.genomes_file.filename = None

        # disconnecting brings it back to None
        self.DISCONNECT()
        assert self.genomes_file.filename is None

        # set the hub's local_dir; genomes_file should follow.
        self.CONNECT()

        # what happens if the hub's local FN is changed?
        self.hub.filename = 'localhub/hub.txt'
        assert self.genomes_file.filename == 'localhub/example_hub.genomes.txt'

    def test_trackdb_fns(self):

        # when unconnected, no defaults
        assert self.trackdb.filename is None

        self.CONNECT()
        assert self.trackdb.filename == 'dm3/trackDb.txt'

        # setting filename overrides
        self.trackdb.filename = 'mytrackdb.txt'
        assert self.trackdb.filename == 'mytrackdb.txt', self.trackdb.filename

        # genomes_file fn overrides
        self.trackdb.filename = None
        self.genomes_file.filename = 'anotherdir/genomes.txt'
        assert self.trackdb.filename == 'anotherdir/dm3/trackDb.txt'

        # reset parent hub and genomes file to get back to the default
        self.genomes_file.filename = None
        assert self.trackdb.filename == 'dm3/trackDb.txt'

    def test_track_fns(self):

        self.CONNECT()
        # local fns should still be None
        for track in self.tracks:
            assert track.source is None

        # filename is relative to the hub's filename
        assert self.tracks[0].filename == 'dm3/track1.bam'
        assert self.tracks[1].filename == 'dm3/track2.bigWig'

        # URL is relative to the trackDb
        assert self.tracks[0].url == 'track1.bam'


    def test_track_creation(self):
        track = Track(name='track0', tracktype='bam', source='t0.bam')
        assert track.source == 't0.bam'
Пример #18
0
class TestComponents(object):
    def setup(self):
        self.hub = Hub(
            hub="example_hub",
            short_label="example hub",
            long_label="an example hub for testing",
            email="*****@*****.**",
        )
        self.genomes_file = GenomesFile()
        self.genome = Genome("dm3")
        self.trackdb = TrackDb()

        self.tracks = [Track(name="track1", tracktype="bam"), Track(name="track2", tracktype="bigWig")]

    def CONNECT(self):
        """
        Connect the components together. The default setup creates the objects
        but does not connect them.
        """
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def DISCONNECT(self):
        """
        Re-run the setup, which results in unconnected components. Run
        CONNECT() to connect them up.
        """
        self.setup()

    def test_self_connection(self):
        """
        meta test: make sure the test class's connect/disconnect is working
        """
        assert self.hub.genomes_file is None

        self.CONNECT()
        assert self.hub.genomes_file is self.genomes_file

        self.DISCONNECT()
        assert self.hub.genomes_file is None

    # Filenames ---------------------------------------------------------------

    def test_hub_fns(self):
        # Default unconnected
        assert self.hub.local_fn == "example_hub.hub.txt"
        assert self.hub.remote_fn == "example_hub.hub.txt"
        assert self.hub.url is None

        # Connecting components should not change hub
        self.CONNECT()
        assert self.hub.local_fn == "example_hub.hub.txt"
        assert self.hub.remote_fn == "example_hub.hub.txt"
        assert self.hub.url is None
        self.DISCONNECT()

        # set local/remote dir and fns should follow
        self.hub.local_dir = "/data/hubs"
        self.hub.remote_dir = "/remote/hubs"
        assert self.hub.local_fn == "/data/hubs/example_hub.hub.txt", self.hub.local_fn
        assert self.hub.remote_fn == "/remote/hubs/example_hub.hub.txt", self.hub.remote_fn

        # resetting them should not change
        self.hub.local_dir = None
        self.hub.remote_dir = None
        assert self.hub.local_fn == "example_hub.hub.txt"
        assert self.hub.remote_fn == "example_hub.hub.txt"

        # changing local_fn or remote_fn should stick -- and should override
        # any set local/remote dir
        self.hub.local_fn = "dummy.txt"
        self.hub.remote_fn = "remote.txt"
        self.hub.local_dir = "no_dir"
        self.hub.remote_dir = "remote_dir"
        assert self.hub.local_fn == "dummy.txt"
        assert self.hub.remote_fn == "remote.txt"

    def test_genome_file_fns(self):
        assert_raises(AttributeError, getattr, self.genomes_file, "url")
        # When unconnected, filenames should be None
        assert self.genomes_file.local_fn is None
        assert self.genomes_file.remote_fn is None

        # ...though you can set them manually
        self.genomes_file.local_fn = "local.genomes"
        assert self.genomes_file.local_fn == "local.genomes"
        self.genomes_file.local_fn = None

        self.CONNECT()
        assert self.genomes_file.local_fn == "example_hub.genomes.txt"

        # when connected, overriding works
        self.genomes_file.local_fn = "local.genomes"
        assert self.genomes_file.local_fn == "local.genomes"
        self.genomes_file.local_fn = None

        # disconnecting brings it back to None
        self.DISCONNECT()
        assert self.genomes_file.local_fn is None

        # set the hub's local_dir; genomes_file should follow.
        self.CONNECT()
        self.hub.local_dir = "local/"
        assert self.genomes_file.local_fn == "local/example_hub.genomes.txt"

        # what happens if the hub's local FN is changed?
        self.hub.local_fn = "localhub/hub.txt"
        assert self.genomes_file.local_fn == "localhub/example_hub.genomes.txt"

    def test_genome_fns(self):
        # should be easy -- filenames should raise attribute errors
        assert_raises(AttributeError, getattr, self.genome, "local_fn")
        assert_raises(AttributeError, getattr, self.genome, "remote_fn")
        assert_raises(AttributeError, getattr, self.genome, "url")

    def test_trackdb_fns(self):
        assert_raises(AttributeError, getattr, self.trackdb, "url")

        # when unconnected, no defaults
        assert self.trackdb.local_fn is None
        assert self.trackdb.remote_fn is None

        self.CONNECT()
        assert self.trackdb.local_fn == "dm3/trackDb.txt"
        assert self.trackdb.remote_fn == "dm3/trackDb.txt"

        # setting the local dir on the hub trickles down
        self.hub.local_dir = "localdir"
        assert self.trackdb.local_fn == "localdir/dm3/trackDb.txt"

        # setting local_fn overrides
        self.trackdb.local_fn = "mytrackdb.txt"
        assert self.trackdb.local_fn == "mytrackdb.txt", self.trackdb.local_fn

        # ...and back to None to reset
        self.trackdb.local_fn = None
        assert self.trackdb.local_fn == "localdir/dm3/trackDb.txt"

        # genomes_file fn overrides
        self.genomes_file.local_fn = "anotherdir/genomes.txt"
        assert self.trackdb.local_fn == "anotherdir/dm3/trackDb.txt"

        # reset parent hub and genomes file to get back to the default
        self.genomes_file.local_fn = None
        self.hub.local_dir = None
        assert self.trackdb.local_fn == "dm3/trackDb.txt"

    def test_track_fns(self):
        for track in self.tracks:
            assert track.local_fn is None
            assert track.remote_fn is None
            assert track.url is None

        self.CONNECT()
        # local fns should still be None
        for track in self.tracks:
            assert track.local_fn is None

        # remote_fn is relative to the hub's remote_fn
        assert self.tracks[0].remote_fn == "dm3/track1.bam"
        assert self.tracks[1].remote_fn == "dm3/track2.bigWig"

        self.hub.remote_fn = "/var/www/hubs/hub.txt"
        self.hub.url = "http://example.com/hubs/hub.txt"

        # URL is relative to the trackDb
        assert self.tracks[0].url == "track1.bam"

    def test_track_creation(self):
        track = Track(name="track0", tracktype="bam", local_fn="t0.bam")
        assert track.local_fn == "t0.bam"
Пример #19
0
parser.add_argument("--email", help="Required. your email to contact")
parser.add_argument("--composite_track_name",
                    help="Required. the name of your composite track")

args = parser.parse_args()

assert args.hub_name is not None, "please provide the hub_name"
assert args.base_url is not None, "please provide the base_url"
assert args.composite_track_name is not None, "please provide the composite track name"
assert args.email is not None, "please provide your email"
assert args.input_dir is not None, "please provide the path to the bigwig and bigbed files on your local computer"

from trackhub import Hub, GenomesFile, Genome, TrackDb

hub = Hub(hub='%s' % args.hub_name,
          short_label='%s' % args.hub_name,
          long_label='%s ChIP-seq hub' % args.hub_name,
          email='%s' % args.email)

genomes_file = GenomesFile()
genome = Genome('hg19')
trackdb = TrackDb()

# Bottom-up
genome.add_trackdb(trackdb)
genomes_file.add_genome(genome)
hub.add_genomes_file(genomes_file)

# make a composite track
from trackhub import CompositeTrack

composite = CompositeTrack(name="%s" % args.composite_track_name,
Пример #20
0
class TestComponents(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]

    def CONNECT(self):
        """
        Connect the components together. The default setup creates the objects
        but does not connect them.
        """
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def DISCONNECT(self):
        """
        Re-run the setup, which results in unconnected components. Run
        CONNECT() to connect them up.
        """
        self.setup()

    def test_self_connection(self):
        """
        meta test: make sure the test class's connect/disconnect is working
        """
        assert self.hub.genomes_file is None

        self.CONNECT()
        assert self.hub.genomes_file is self.genomes_file

        self.DISCONNECT()
        assert self.hub.genomes_file is None

    # Filenames ---------------------------------------------------------------

    def test_hub_fns(self):
        # Default unconnected
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'
        assert self.hub.url is None

        # Connecting components should not change hub
        self.CONNECT()
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'
        assert self.hub.url is None
        self.DISCONNECT()

        # set local/remote dir and fns should follow
        self.hub.local_dir = '/data/hubs'
        self.hub.remote_dir = '/remote/hubs'
        assert self.hub.local_fn == '/data/hubs/example_hub.hub.txt', \
            self.hub.local_fn
        assert self.hub.remote_fn == '/remote/hubs/example_hub.hub.txt', \
            self.hub.remote_fn

        # resetting them should not change
        self.hub.local_dir = None
        self.hub.remote_dir = None
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'

        # changing local_fn or remote_fn should stick -- and should override
        # any set local/remote dir
        self.hub.local_fn = 'dummy.txt'
        self.hub.remote_fn = 'remote.txt'
        self.hub.local_dir = 'no_dir'
        self.hub.remote_dir = 'remote_dir'
        assert self.hub.local_fn == 'dummy.txt'
        assert self.hub.remote_fn == 'remote.txt'

    def test_genome_file_fns(self):
        assert_raises(AttributeError, getattr, self.genomes_file, 'url')
        # When unconnected, filenames should be None
        assert self.genomes_file.local_fn is None
        assert self.genomes_file.remote_fn is None

        #...though you can set them manually
        self.genomes_file.local_fn = 'local.genomes'
        assert self.genomes_file.local_fn == 'local.genomes'
        self.genomes_file.local_fn = None

        self.CONNECT()
        assert self.genomes_file.local_fn == 'example_hub.genomes.txt'

        # when connected, overriding works
        self.genomes_file.local_fn = 'local.genomes'
        assert self.genomes_file.local_fn == 'local.genomes'
        self.genomes_file.local_fn = None

        # disconnecting brings it back to None
        self.DISCONNECT()
        assert self.genomes_file.local_fn is None

        # set the hub's local_dir; genomes_file should follow.
        self.CONNECT()
        self.hub.local_dir = 'local/'
        assert self.genomes_file.local_fn == 'local/example_hub.genomes.txt'

        # what happens if the hub's local FN is changed?
        self.hub.local_fn = 'localhub/hub.txt'
        assert self.genomes_file.local_fn == 'localhub/example_hub.genomes.txt'

    def test_genome_fns(self):
        # should be easy -- filenames should raise attribute errors
        assert_raises(AttributeError, getattr, self.genome, 'local_fn')
        assert_raises(AttributeError, getattr, self.genome, 'remote_fn')
        assert_raises(AttributeError, getattr, self.genome, 'url')

    def test_trackdb_fns(self):
        assert_raises(AttributeError, getattr, self.trackdb, 'url')

        # when unconnected, no defaults
        assert self.trackdb.local_fn is None
        assert self.trackdb.remote_fn is None

        self.CONNECT()
        assert self.trackdb.local_fn == 'dm3/trackDb.txt'
        assert self.trackdb.remote_fn == 'dm3/trackDb.txt'

        # setting the local dir on the hub trickles down
        self.hub.local_dir = 'localdir'
        assert self.trackdb.local_fn == 'localdir/dm3/trackDb.txt'

        # setting local_fn overrides
        self.trackdb.local_fn = 'mytrackdb.txt'
        assert self.trackdb.local_fn == 'mytrackdb.txt', self.trackdb.local_fn

        # ...and back to None to reset
        self.trackdb.local_fn = None
        assert self.trackdb.local_fn == 'localdir/dm3/trackDb.txt'

        # genomes_file fn overrides
        self.genomes_file.local_fn = 'anotherdir/genomes.txt'
        assert self.trackdb.local_fn == 'anotherdir/dm3/trackDb.txt'

        # reset parent hub and genomes file to get back to the default
        self.genomes_file.local_fn = None
        self.hub.local_dir = None
        assert self.trackdb.local_fn == 'dm3/trackDb.txt'

    def test_track_fns(self):
        for track in self.tracks:
            assert track.local_fn is None
            assert track.remote_fn is None
            assert track.url is None

        self.CONNECT()
        # local fns should still be None
        for track in self.tracks:
            assert track.local_fn is None

        # remote_fn is relative to the hub's remote_fn
        assert self.tracks[0].remote_fn == 'dm3/track1.bam'
        assert self.tracks[1].remote_fn == 'dm3/track2.bigWig'

        self.hub.remote_fn = '/var/www/hubs/hub.txt'
        self.hub.url = 'http://example.com/hubs/hub.txt'

        # URL is relative to the trackDb
        assert self.tracks[0].url == 'track1.bam'


    def test_track_creation(self):
        track = Track(name='track0', tracktype='bam', local_fn='t0.bam')
        assert track.local_fn == 't0.bam'
Пример #21
0
class TestUpload(object):
    def setup(self):
        with pytest.warns(DeprecationWarning):
            self.hub = Hub(
                hub='example_hub',
                short_label='example hub',
                long_label='an example hub for testing',
                email='*****@*****.**')
            self.genomes_file = GenomesFile()
            self.genome = Genome('dm3')
            self.trackdb = TrackDb()

            self.tracks = [
                Track(
                    name='track1',
                    tracktype='bigBed',
                    local_fn=os.path.join(d, 'random-hg38-0.bigBed'),
                    remote_fn='1.bigbed',
                ),
                Track(
                    name='track2',
                    tracktype='bigWig',
                    local_fn=os.path.join(d, 'sine-hg38-0.bedgraph.bw'),
                    remote_fn='2.bw',
                ),
            ]
            self.hub.add_genomes_file(self.genomes_file)
            self.genomes_file.add_genome(self.genome)
            self.genome.add_trackdb(self.trackdb)
            self.trackdb.add_tracks(self.tracks)

    def test_staging(self):
        staging_dir, linknames = upload.stage_hub(self.hub)

        assert open(os.path.join(staging_dir, 'example_hub.genomes.txt')).read() == dedent(
            """\
            genome dm3
            trackDb dm3/trackDb.txt

            """)

        assert open(os.path.join(staging_dir, 'example_hub.hub.txt')).read() == dedent(
            """\
            hub hub
            shortLabel example hub
            longLabel an example hub for testing
            genomesFile example_hub.genomes.txt
            email [email protected]""")

        print(staging_dir)

    #@unittest.skipUnless(os.path.exists('data/track1.bam'), 'No test data')
    def test_upload(self):
        d = tempfile.mkdtemp()
        upload.upload_hub(
            hub=self.hub,
            remote_dir=d,
            user=None,
            host=None,
        )

    def test_render(self):
        trackdb = str(self.trackdb)
        print(self.trackdb)
        # make sure some of the trackdb rendered correctly
        assert 'track track1' in trackdb
        assert 'bigDataUrl ../1.bigbed' in trackdb
        assert 'bigDataUrl ../2.bw' in trackdb
Пример #22
0
    if not args.long_label:
        args.long_label = args.short_label

    upload_dir = os.path.join(args.upload_dir, args.hub)
    #THIS IS REALLY BAD AND NOT INUITITVE
    if args.no_s3:
        URLBASE = os.path.join(
            "https://s3-us-west-1.amazonaws.com/sauron-yeo/", args.hub)
    else:
        URLBASE = os.path.join("http://sauron.ucsd.edu/Hubs", args.hub)

    GENOME = args.genome

    hub = Hub(
        hub=args.hub,
        short_label=args.short_label,
        long_label=args.long_label,
        email=args.email,
    )

    genomes_file = GenomesFile()
    genome = Genome(GENOME)
    trackdb = TrackDb()
    supertrack = SuperTrack(name=args.hub,
                            short_label=args.hub,
                            long_label=args.hub)
    genome.add_trackdb(trackdb)
    genomes_file.add_genome(genome)
    hub.add_genomes_file(genomes_file)
    hub.upload_fn = upload_dir

    files = args.files
Пример #23
0
    
    args = parser.parse_args()

    #default setting
    if not args.short_label:
        args.short_label = args.hub
    if not args.long_label:
        args.long_label = args.short_label
        
    upload_dir = os.path.join("/zfs/Hubs", args.hub)
    URLBASE= os.path.join("http://sauron.ucsd.edu/Hubs", args.hub)
    GENOME = args.genome
    
    hub = Hub(hub=args.hub,
              short_label=args.short_label,
              long_label=args.long_label,
              email = args.email,
              )
    
    genomes_file = GenomesFile()
    genome = Genome(GENOME)
    trackdb = TrackDb()
    
    genome.add_trackdb(trackdb)
    genomes_file.add_genome(genome)
    hub.add_genomes_file(genomes_file)
    hub.upload_fn = upload_dir
    
    files = args.files
    #logic for doing pos and neg as the same multi trackhub
    #process bw files first, do the rest with old logic
Пример #24
0
class TestComponents(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]

    def CONNECT(self):
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def DISCONNECT(self):
        self.setup()

    def test_self_connection(self):
        """
        meta test: make sure the test class's connect/disconnect is working
        """
        assert self.hub.genomes_file is None

        self.CONNECT()
        assert self.hub.genomes_file is self.genomes_file

        self.DISCONNECT()
        assert self.hub.genomes_file is None

    # Filenames ---------------------------------------------------------------

    def test_hub_fns(self):
        # Default unconnected
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'
        assert self.hub.url is None

        # Connecting components should not change hub
        self.CONNECT()
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'
        assert self.hub.url is None
        self.DISCONNECT()

        # set local/remote dir and fns should follow
        self.hub.local_dir = '/data/hubs'
        self.hub.remote_dir = '/remote/hubs'
        assert self.hub.local_fn == '/data/hubs/example_hub.hub.txt', \
            self.hub.local_fn
        assert self.hub.remote_fn == '/remote/hubs/example_hub.hub.txt', \
            self.hub.remote_fn

        # resetting them should not change
        self.hub.local_dir = None
        self.hub.remote_dir = None
        assert self.hub.local_fn == 'example_hub.hub.txt'
        assert self.hub.remote_fn == 'example_hub.hub.txt'

        # changing local_fn or remote_fn should stick -- and should override
        # any set local/remote dir
        self.hub.local_fn = 'dummy.txt'
        self.hub.remote_fn = 'remote.txt'
        self.hub.local_dir = 'no_dir'
        self.hub.remote_dir = 'remote_dir'
        assert self.hub.local_fn == 'dummy.txt'
        assert self.hub.remote_fn == 'remote.txt'

    def test_genome_file_fns(self):
        assert_raises(AttributeError, getattr, self.genomes_file, 'url')
        # When unconnected, filenames should be None
        assert self.genomes_file.local_fn is None
        assert self.genomes_file.remote_fn is None

        #...though you can set them manually
        self.genomes_file.local_fn = 'local.genomes'
        assert self.genomes_file.local_fn == 'local.genomes'
        self.genomes_file.local_fn = None

        self.CONNECT()
        assert self.genomes_file.local_fn == 'example_hub.genomes.txt'

        # when connected, overriding works
        self.genomes_file.local_fn = 'local.genomes'
        assert self.genomes_file.local_fn == 'local.genomes'
        self.genomes_file.local_fn = None

        # disconnecting brings it back to None
        self.DISCONNECT()
        assert self.genomes_file.local_fn is None

        # set the hub's local_dir; genomes_file should follow.
        self.CONNECT()
        self.hub.local_dir = 'local/'
        assert self.genomes_file.local_fn == 'local/example_hub.genomes.txt'

        # what happens if the hub's local FN is changed?
        self.hub.local_fn = 'localhub/hub.txt'
        assert self.genomes_file.local_fn == 'localhub/example_hub.genomes.txt'

    def test_genome_fns(self):
        # should be easy -- filenames should raise attribute errors
        assert_raises(AttributeError, getattr, self.genome, 'local_fn')
        assert_raises(AttributeError, getattr, self.genome, 'remote_fn')
        assert_raises(AttributeError, getattr, self.genome, 'url')

    def test_trackdb_fns(self):
        assert_raises(AttributeError, getattr, self.trackdb, 'url')

        # when unconnected, no defaults
        assert self.trackdb.local_fn is None
        assert self.trackdb.remote_fn is None

        self.CONNECT()
        assert self.trackdb.local_fn == 'dm3/trackDb.txt'
        assert self.trackdb.remote_fn == 'dm3/trackDb.txt'

        # setting the local dir on the hub trickles down
        self.hub.local_dir = 'localdir'
        assert self.trackdb.local_fn == 'localdir/dm3/trackDb.txt'

        # setting local_fn overrides
        self.trackdb.local_fn = 'mytrackdb.txt'
        assert self.trackdb.local_fn == 'mytrackdb.txt', self.trackdb.local_fn

        # ...and back to None to reset
        self.trackdb.local_fn = None
        assert self.trackdb.local_fn == 'localdir/dm3/trackDb.txt'

        # genomes_file fn overrides
        self.genomes_file.local_fn = 'anotherdir/genomes.txt'
        assert self.trackdb.local_fn == 'anotherdir/dm3/trackDb.txt'

        # reset parent hub and genomes file to get back to the default
        self.genomes_file.local_fn = None
        self.hub.local_dir = None
        assert self.trackdb.local_fn == 'dm3/trackDb.txt'

    def test_track_fns(self):
        for track in self.tracks:
            assert track.local_fn is None
            assert track.remote_fn is None
            assert track.url is None

        self.CONNECT()
        # local fns should still be None
        for track in self.tracks:
            assert track.local_fn is None

        assert self.tracks[0].remote_fn == 'dm3/track1.bam'
        assert self.tracks[1].remote_fn == 'dm3/track2.bigWig'

        self.hub.remote_fn = '/var/www/hubs/hub.txt'
        self.hub.url = 'http://example.com/hubs/hub.txt'

        print self.tracks[0].remote_fn
        print self.hub.remote_fn
        assert self.tracks[0].url == 'http://example.com/hubs/dm3/track1.bam'


    def test_track_creation(self):
        track = Track(name='track0', tracktype='bam', local_fn='t0.bam')
        assert track.local_fn == 't0.bam'
Пример #25
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Takes in files to turn into trackhub. This version automatically ')

    # tracks files
    ##############
    parser.add_argument('files',
                        nargs='+',
                        help='Files to turn into track hub')

    # namings
    #########
    parser.add_argument('--hub', help="hub name (no spaces)", required=True)
    parser.add_argument('--genome', help="genome name", required=True)

    # upload (in fact run_local=True)
    ########
    #parser.add_argument('--no_s3', default=False, action="store_true", help="upload to defined server instead of s3")
    #parser.add_argument('--serverscp', default="tscc-login2.sdsc.edu", help="server to SCP to")
    #parser.add_argument('--user', default='adomissy', help="that is uploading files")
    # parser.add_argument('--uploaddir', default='yeolab-trackhubs', help="directory to upload files to if not uploading to aws")

    # web access
    ############
    # parser.add_argument('--urldomain', default="s3-us-west-2.amazonaws.com", help="url domain for public access to trackhubs")
    # parser.add_argument('--urldir',    default="yeolab-trackhubs", help="url directory for public access to trackhubs")

    # hub labels
    ############
    # parser.add_argument('--hub_short_label', default=None, help="short label for hub")
    # parser.add_argument('--hub_long_label',  default=None, help="long label for hub")
    parser.add_argument('--hub_email',
                        default='*****@*****.**',
                        help="email for hub")

    # name parts grouping
    #####################
    parser.add_argument('--sep', default=".", help="Seperator")
    parser.add_argument('--num_sep',
                        default=2,
                        type=int,
                        help="Number of seperators deep to group on")

    ###########################################################################
    args = parser.parse_args()

    # TODO: unhack this, but let's keep all trackhubs here for now.
    urldomain = "s3-us-west-2.amazonaws.com"
    urldir = "yeolab-trackhubs"
    uploaddir = "yeolab-trackhubs"
    hub_name = args.hub
    hub_email = args.hub_email

    # default label settings
    ########################
    hub_short_label = hub_name
    hub_long_label = hub_name

    # hard coding serverscp, in variable HOST
    HOST = "localhost"
    # hard coding user, in variable USER
    USER = "******"

    GENOME = args.genome
    # hack for tutorial dataset so it is easy to view in ucsd genome browser
    if GENOME == 'hg19chr19kbp255':
        GENOME == 'hg19'

    uploaddir = os.path.join(uploaddir, hub_name)

    URLBASE = os.path.join("http://" + urldomain + "/" + urldir + "/",
                           hub_name)

    # create data structures
    ########################

    hub = Hub(
        hub=hub_name,
        short_label=hub_short_label,
        long_label=hub_long_label,
        email=hub_email,
    )
    hub.upload_fn = uploaddir

    genomes_file = GenomesFile()
    hub.add_genomes_file(genomes_file)

    genome = Genome(GENOME)
    genomes_file.add_genome(genome)

    trackdb = TrackDb()
    genome.add_trackdb(trackdb)

    supertrack = SuperTrack(name=hub_name,
                            short_label=hub_short_label,
                            long_label=hub_long_label)

    # separate bigwigs, bigbeds and others for different processing methods
    #######################################################################

    bigwig_files = [
        file for file in args.files
        if file.endswith(".posbw") or file.endswith(".negbw") or file.endswith(
            ".bw") or file.endswith(".bigWig") or file.endswith(".bigwig")
    ]
    bigbed_files = [
        file for file in args.files if file.endswith(".bb")
        or file.endswith(".bigBed") or file.endswith(".bigbed")
    ]

    # not used
    #other_files = [file for file in args.files if (file not in bigwig_files and file not in bigbed_files )]

    # process bigwig files , re-grouped by third 2 dot-sepatarated name-parts, as multitracks
    ##########################################################################################
    key_func = lambda x: x.split(args.sep)[:args.num_sep]
    for group_key, group_bigwig_files in groupby(
            sorted(bigwig_files, key=key_func), key_func):

        group_bigwig_files_list = list(group_bigwig_files)
        print("args sep: {}".format(args.sep))
        print("args num sep: {}".format(args.num_sep))
        print("split filename: {}".format(bigwig_files[0].split(
            args.sep)[:args.num_sep]))
        print "-----------------------------------------"
        print "processing bigwig files group with key :", group_key
        print "comprised of following files:", group_bigwig_files_list
        print "-----------------------------------------"

        long_name = remove_plus_and_pct(
            os.path.basename(args.sep.join(group_key[:args.num_sep])))
        aggregate = AggregateTrack(name=long_name,
                                   tracktype='bigWig',
                                   short_label=long_name,
                                   long_label=long_name,
                                   aggregate='transparentOverlay',
                                   showSubtrackColorOnUi='on',
                                   autoScale='on',
                                   priority='1.4',
                                   alwaysZero='on',
                                   visibility="full")

        for bigwigfile in group_bigwig_files_list:
            print "--------------------------"
            print "bigwigfile", bigwigfile
            print "--------------------------"
            base_track = remove_plus_and_pct(os.path.basename(bigwigfile))
            split_track = base_track.split(args.sep)
            long_name = args.sep.join(split_track[:args.num_sep] +
                                      split_track[-3:])
            color = "0,100,0" if "pos" in bigwigfile else "100,0,0"
            track = Track(name=long_name,
                          url=os.path.join(URLBASE, GENOME, base_track),
                          tracktype="bigWig",
                          short_label=long_name,
                          long_label=long_name,
                          color=color,
                          local_fn=bigwigfile,
                          remote_fn=os.path.join(uploaddir, GENOME,
                                                 base_track))
            #print "aggregate.add_subtrack", track.name
            aggregate.add_subtrack(track)
        #print "supertrack.add_track", aggregate
        supertrack.add_track(aggregate)

        #print "trackdb.add_tracks", aggregate
        #trackdb.add_tracks(aggregate)

    # process bigbed files as single track
    ######################################

    for bigbed_file in bigbed_files:

        #     print "--------------------------"
        #     print "bigbedfile",  bigbedfile
        #     print "--------------------------"

        color = "0,100,0" if "pos" in bigbed_file else "100,0,0"
        base_track = remove_plus_and_pct(os.path.basename(bigbed_file))
        long_name = args.sep.join(base_track.split(
            args.sep)[:args.num_sep]) + ".bb"
        track = Track(name=long_name,
                      url=os.path.join(URLBASE, GENOME, base_track),
                      tracktype="bigBed",
                      short_label=long_name,
                      long_label=long_name,
                      color=color,
                      local_fn=bigbed_file,
                      remote_fn=os.path.join(uploaddir, GENOME, base_track),
                      visibility="full")
        #trackdb.add_tracks(track)
        supertrack.add_track(track)

    trackdb.add_tracks(supertrack)
    result = hub.render()
    hub.remote_fn = os.path.join(uploaddir, "hub.txt")

    # process bigbed files  (bam?)
    ######################
    ##  UNUSED
    # if bigwigfile.endswith(".bw") or bigwigfile.endswith('.bigWig'): tracktype = "bigWig"
    # if bigwigfile.endswith(".bb") or bigwigfile.endswith('.bigBed'): tracktype = "bigBed"
    # if bigwigfile.endswith(".bam"):                                  tracktype = "bam"

    # 'upolading' (locally)
    ########################
    for track in trackdb.tracks:
        #print("upload_track(track=" + track.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True")
        #upload_track(track=track, host=args.serverscp, user=args.user)
        # upload_track(track=track, host=args.serverscp, user=args.user, run_s3=args.no_s3)
        upload_track(track=track, host=HOST, user=USER, run_local=True)

    #print("upload_hub(hub=" + hub.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True")
    #upload_hub(hub=hub, host=args.serverscp, user=args.user)
    # upload_hub(hub=hub, host=args.serverscp, user=args.user, run_s3=args.no_s3)
    pass
    upload_hub(hub=hub, host=HOST, user=USER, run_local=True)
    #
    print("UPLOADDIR: {}".format(uploaddir))
    print("BUCKET: {}".format(uploaddir))
    copy_dir_to_aws(
        src=uploaddir,
        dest=uploaddir,
    )
    print("FINAL URL: {}/hub.txt".format(URLBASE))
Пример #26
0
def main():
    parser = argparse.ArgumentParser(
        description='Make trackhubs for UCSC browser using bigBed files. \
                                     Outputs to CURRENT DIRECTORY.')
    parser.add_argument('inputdir',
                        metavar='INDIR',
                        help='Directory containing bigBed files .bb ending')
    parser.add_argument('outdir',
                        metavar='OUTDIR',
                        help='Directory for staging files')
    parser.add_argument('--quiet',
                        '-q',
                        action='store_true',
                        help='Suppress some print statements')
    parser.add_argument('--render',
                        '-r',
                        action='store_true',
                        help='Render file to current dir')
    parser.add_argument('--upload',
                        '-u',
                        action='store_true',
                        help='Upload file to webserver')
    parser.add_argument('--mm9',
                        '-m',
                        action='store_true',
                        help='Switch from mm10 to mm9')
    parser.add_argument('--has_strand',
                        '-s',
                        action='store_true',
                        help='Bed has strand (changes from 5 columns to 6)')
    parser.add_argument('--suffix',
                        '-S',
                        metavar="trackhub label suffix",
                        default="",
                        help='Suffix to label, for example H3K4me1')
    args = parser.parse_args()

    # store command line arguments for reproducibility
    CMD_INPUTS = ' '.join(['python'] + sys.argv)  # easy printing later
    # store argparse inputs for reproducibility / debugging purposes
    args_dic = vars(args)
    ARG_INPUTS = ['%s=%s' % (key, val) for key, val in args_dic.iteritems()]
    # ARG_INPUTS = ['%s=%s' % (key, val) for key, val in args_dic.items()]
    ARG_INPUTS = ' '.join(ARG_INPUTS)

    # Print arguments supplied by user
    if not args.quiet:
        print('Command line inputs:')
        print(CMD_INPUTS)
        print('Argparse variables:')
        print(ARG_INPUTS)

    # define constants (hard coded)
    if args.mm9:
        genobuild = "mm9"
    else:
        genobuild = "mm10"
    jsuffix = "%s_%s" % (genobuild, args.suffix)
    print("Assigning prefix: %s" % jsuffix)
    # dirname: motevo_from_peaks/H3K4me1_peaks
    dirname = "motevo_from_peaks/%s_peaks/motevo_motifs_%s" % (args.suffix,
                                                               jsuffix)
    hubname = "motevo_motifs_%s" % jsuffix
    shortlab = "motevo_%s" % jsuffix
    longlab = "Motevo motifs %s" % jsuffix
    email = "*****@*****.**"
    # url = "http://upnaepc2.epfl.ch"
    url = "http://upnaesrv1.epfl.ch"
    assay = "bigbed"
    jvis = "dense"
    # bigbed options loaded into ViewTrack
    jspectrum = "on"
    scoremax = 1000
    scoremin = 500

    # define URLs
    url_main = "%s/%s" % (url, dirname)
    url_base = "%s/%s/data" % (url, dirname)
    # upload_main = "~/Sites/%s" % dirname
    # upload_base = "~/Sites/%s/data" % dirname
    upload_main = "%s" % hubname
    upload_base = "%s/data" % hubname
    if not args.has_strand:
        ftype = "bigBed 5"
    else:
        ftype = "bigBed 6"
    # host = "circadian.epfl.ch"
    # user = "******"
    host = "upnaesrv1.epfl.ch"
    user = "******"

    # define constants
    genomebuild = genobuild

    files_dic = get_files_from_dir(args.inputdir, ext=".bb")

    samples_dic = {}
    for sample in files_dic.keys():
        samples_dic[sample] = sample

    # init hub genomes file genome trackdb
    # Make my hub
    hub = Hub(hub=hubname,
              short_label=shortlab,
              long_label=longlab,
              email=email)
    # url = "%s/%s" % (url, dirname))

    hub.url = os.path.join(url_main, "%s.hub.txt" % hub.hub)

    genomes_file = GenomesFile()
    genome = Genome(genomebuild)
    trackdb = TrackDb()

    # add remote fn
    # hub.remote_fn = os.path.join(upload_main, "hub.txt")
    # genomes_file.remote_fn = os.path.join(upload_main, "genomes.txt")
    hub.remote_fn = upload_main
    genomes_file.remote_fn = upload_main
    trackdb.remote_fn = os.path.join(upload_main, genomebuild, "trackDb.txt")

    hub.add_genomes_file(genomes_file)
    genome.add_trackdb(trackdb)
    genomes_file.add_genome(genome)

    # init composite
    composite = CompositeTrack(name=hubname,
                               short_label=shortlab,
                               long_label=longlab,
                               tracktype=ftype)
    # make subgroups
    subgroups = [
        SubGroupDefinition(name="sample", label="sample", mapping=samples_dic),
    ]
    composite.add_subgroups(subgroups)
    # make viewTrack, a hierarchy containing my files, for example
    view = ViewTrack(
        name="%sViewTrack" % assay,
        view="%s" % assay,
        visibility=jvis,
        tracktype=ftype,
        short_label="%s" % assay,
        long_label="%s assay" % assay,
        # big bed labels
        spectrum=jspectrum,
        scoreMin=scoremin,
        scoreMax=scoremax)
    composite.add_view(view)

    # make track
    for sample, wfs in files_dic.iteritems():
        for wf in wfs:
            sampname = os.path.basename(wf)
            bname = sampname.split(".")[0]
            track = Track(name=bname,
                          tracktype=ftype,
                          url=os.path.join(url_base, "%s" % sampname),
                          local_fn=os.path.abspath(wf),
                          remote_fn=os.path.join(upload_base, "%s" % sampname),
                          visibility=jvis,
                          shortLabel=bname,
                          longLabel=bname,
                          spectrum=jspectrum,
                          scoreMin=scoremin,
                          scoreMax=scoremax,
                          subgroups={"sample": sample})
            view.add_tracks(track)
    trackdb.add_tracks(composite)

    print('Track looks like this:')
    print(trackdb)

    if args.render:
        # print('Rendering to %s' % hub.local_fn)
        # results = hub.render()
        # upload_hub(hub=hub, host='localhost', remote_dir='example_grouping_hub')
        stage_hub(hub, staging=args.outdir)

    if args.upload:
        print('Uploading to [email protected]')
        # for track in trackdb.tracks:
        #     upload_track(track = track, host = host, user = user)
        upload_hub(hub=hub,
                   host=host,
                   user=user,
                   remote_dir="/data/web/sites/motevo_from_peaks")

    print('Subgroups:')
    for sg in subgroups:
        print(sg)
    print("Staging to path: %s" % args.outdir)
Пример #27
0
def files2viz(files_to_visualize):
	if DEBUG:
		print "Visualizing"
		print files_to_visualize

	from trackhub import Hub, GenomesFile, Genome, TrackDb, Track
	from trackhub.upload import upload_hub
	HUBHOST = 'http://cherry-vm45.stanford.edu'
	HUBDIR = 'trackhubs'
	USER = '******'
	URLBASE = os.path.join(HUBHOST, HUBDIR)
	EDWBASE = 'http://encodedcc.sdsc.edu/warehouse'
	GENOME = 'hg19'

	hub = Hub(
		hub='Selected_ENCODE_Tracks',
		short_label='Selected_ENCODE_Tracks_short',
		long_label='Selected_ENCODE_Tracks_long',
		email='*****@*****.**')

	genomes_file = GenomesFile()
	genome = Genome(GENOME)
	trackdb = TrackDb()

	for accession in files_to_visualize:
		file_obj = get_ENCODE(accession)
		if DEBUG:
			print file_obj
		if file_obj['file_format'] == 'bigWig':
			track = Track(
				name=accession,
				url=os.path.join(EDWBASE, str(file_obj['download_path'])),
				tracktype='bigWig',
				short_label=accession,
				long_label=accession,
				color='128,0,0',
				visibility='full')
			trackdb.add_tracks([track])

	genome.add_trackdb(trackdb)
	genomes_file.add_genome(genome)
	hub.add_genomes_file(genomes_file)

	results=hub.render()
	if DEBUG:
		print hub
		print '...'
		print genomes_file
		print '...'
		print genome
		print '...'
		print trackdb
	#upload_hub(hub=hub, host=HUBHOST, user=USER) #doesn't seem to work
	import subprocess
	subprocess.call("cd .. && rsync -r trackhub [email protected]:/www/html/trackhubs", shell=True)
	import webbrowser
	hubfile = str(hub.hub) + '.hub.txt'
	UCSC_url = 'http://genome.ucsc.edu/cgi-bin/hgTracks?udcTimeout=1&db=hg19' + \
				'&hubUrl=' + os.path.join(HUBHOST,HUBDIR,'trackhub',hubfile) #  + \
				#'&hsS_doLoadUrl=submit' + '&hgS_loadUrlName=' + os.path.join(HUBHOST,HUBDIR,'trackhub','session.txt')
	print UCSC_url
	webbrowser.open(UCSC_url)
Пример #28
0
class TestComponents(object):
    def setup(self):
        self.hub = Hub(hub='example_hub',
                       short_label='example hub',
                       long_label='an example hub for testing',
                       email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(name='track1', tracktype='bam'),
            Track(name='track2', tracktype='bigWig'),
        ]

    def CONNECT(self):
        """
        Connect the components together. The default setup creates the objects
        but does not connect them.
        """
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def DISCONNECT(self):
        """
        Re-run the setup, which results in unconnected components. Run
        CONNECT() to connect them up.
        """
        self.setup()

    def test_self_connection(self):
        """
        meta test: make sure the test class's connect/disconnect is working
        """
        assert self.hub.genomes_file is None

        self.CONNECT()
        assert self.hub.genomes_file is self.genomes_file

        self.DISCONNECT()
        assert self.hub.genomes_file is None

    # Filenames ---------------------------------------------------------------

    def test_hub_fns(self):
        # Default unconnected
        assert self.hub.filename == 'example_hub.hub.txt'

        # Connecting components should not change hub
        self.CONNECT()
        assert self.hub.filename == 'example_hub.hub.txt'
        self.DISCONNECT()

    def test_genome_file_fns(self):
        with pytest.raises(AttributeError):
            getattr(self.genomes_file, 'url')

        # When unconnected, filenames should be None
        assert self.genomes_file.filename is None

        #...though you can set them manually
        self.genomes_file.filename = 'local.genomes'
        assert self.genomes_file.filename == 'local.genomes'
        self.genomes_file.filename = None

        self.CONNECT()
        assert self.genomes_file.filename == 'example_hub.genomes.txt'

        # when connected, overriding works
        self.genomes_file.filename = 'local.genomes'
        assert self.genomes_file.filename == 'local.genomes'
        self.genomes_file.filename = None

        # disconnecting brings it back to None
        self.DISCONNECT()
        assert self.genomes_file.filename is None

        # set the hub's local_dir; genomes_file should follow.
        self.CONNECT()

        # what happens if the hub's local FN is changed?
        self.hub.filename = 'localhub/hub.txt'
        assert self.genomes_file.filename == 'localhub/example_hub.genomes.txt'

    def test_trackdb_fns(self):

        # when unconnected, no defaults
        assert self.trackdb.filename is None

        self.CONNECT()
        assert self.trackdb.filename == 'dm3/trackDb.txt'

        # setting filename overrides
        self.trackdb.filename = 'mytrackdb.txt'
        assert self.trackdb.filename == 'mytrackdb.txt', self.trackdb.filename

        # genomes_file fn overrides
        self.trackdb.filename = None
        self.genomes_file.filename = 'anotherdir/genomes.txt'
        assert self.trackdb.filename == 'anotherdir/dm3/trackDb.txt'

        # reset parent hub and genomes file to get back to the default
        self.genomes_file.filename = None
        assert self.trackdb.filename == 'dm3/trackDb.txt'

    def test_track_fns(self):

        self.CONNECT()
        # local fns should still be None
        for track in self.tracks:
            assert track.source is None

        # filename is relative to the hub's filename
        assert self.tracks[0].filename == 'dm3/track1.bam'
        assert self.tracks[1].filename == 'dm3/track2.bigWig'

        # URL is relative to the trackDb
        assert self.tracks[0].url == 'track1.bam'

    def test_track_creation(self):
        track = Track(name='track0', tracktype='bam', source='t0.bam')
        assert track.source == 't0.bam'
Пример #29
0
parser.add_argument("--composite_track_name", help="Required. the name of your composite track")

args = parser.parse_args()

assert args.hub_name is not None, "please provide the hub_name"
assert args.base_url is not None, "please provide the base_url"
assert args.composite_track_name is not None, "please provide the composite track name"
assert args.email is not None, "please provide your email"
assert args.input_dir is not None, "please provide the path to the bigwig and bigbed files on your local computer"


from trackhub import Hub, GenomesFile, Genome, TrackDb

hub = Hub(
    hub='%s' % args.hub_name,
    short_label='%s' % args.hub_name,
    long_label='%s ChIP-seq hub' % args.hub_name,
    email='%s' % args.email)

genomes_file = GenomesFile()
genome = Genome('hg19')
trackdb = TrackDb()

# Bottom-up
genome.add_trackdb(trackdb)
genomes_file.add_genome(genome)
hub.add_genomes_file(genomes_file)

# make a composite track
from trackhub import CompositeTrack
Пример #30
0
class TestUpload(object):
    def setup(self):
        self.hub = Hub(
            hub='example_hub',
            short_label='example hub',
            long_label='an example hub for testing',
            email='*****@*****.**')
        self.genomes_file = GenomesFile()
        self.genome = Genome('dm3')
        self.trackdb = TrackDb()

        self.tracks = [
            Track(
                name='track1',
                tracktype='bigBed',
                source=os.path.join(d, 'random-hg38-0.bigBed')
            ),
            Track(
                name='track2',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-0.bedgraph.bw'),
            ),
            Track(
                name='track3',
                tracktype='bigWig',
                source=os.path.join(d, 'sine-hg38-1.bedgraph.bw'),
                filename='3.bw',
            )
        ]
        self.hub.add_genomes_file(self.genomes_file)
        self.genomes_file.add_genome(self.genome)
        self.genome.add_trackdb(self.trackdb)
        self.trackdb.add_tracks(self.tracks)

    def test_staging(self):
        staging_dir, linknames = upload.stage_hub(self.hub)

        assert open(os.path.join(staging_dir, 'example_hub.genomes.txt')).read() == dedent(
            """\
            genome dm3
            trackDb dm3/trackDb.txt

            """)

        assert open(os.path.join(staging_dir, 'example_hub.hub.txt')).read() == dedent(
            """\
            hub hub
            shortLabel example hub
            longLabel an example hub for testing
            genomesFile example_hub.genomes.txt
            email [email protected]""")

    #@unittest.skipUnless(os.path.exists('data/track1.bam'), 'No test data')
    def test_upload(self):
        d = tempfile.mkdtemp()
        print(d)
        upload.upload_hub(
            hub=self.hub,
            remote_dir=d,
            user=None,
            host=None,
        )

    def test_render(self):
        trackdb = str(self.trackdb)
        # make sure some of the trackdb rendered correctly
        assert 'track track1' in trackdb
        assert 'bigDataUrl track1.bigBed' in trackdb
        assert 'bigDataUrl ../3.bw' in trackdb