Пример #1
0
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.webtools.clustering.GSuitePrimaryTrackModifier import GSuitePrimaryTrackModifier

        # Set analysis environment
        gSuite = getGSuiteFromGalaxyTN(choices.gSuite)
        outGSuite = GSuite()
        errorGSuite = GSuite()
        progressViewer = ProgressViewer([('Manipulate tracks', gSuite.numTracks() + 24)],
                                        cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE])
        hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE]

        # Get rsID map for the chosen reference genome
        rsidMap = RsidMapper.createRsidMappingFromStaticFiles(progressViewer, choices.refGenome)

        # Lift over all tracks
        for track in gSuite.allTracks():
            fileName = cls.getFilenameWithGTrackSuffix(track.path)
            title = getTitleWithSuffixReplaced(track.title, 'gtrack')

            try:
                uri = GalaxyGSuiteTrack.generateURI(
                    galaxyFn=hiddenStorageFn,
                    extraFileName=fileName,
                    suffix='gtrack'
                )

                gSuiteTrack = GSuiteTrack(
                    uri,
                    title=title,
                    genome=track.genome,
                    trackType=track.trackType,
                    attributes=track.attributes
                )

                trackFn = gSuiteTrack.path
                GSuitePrimaryTrackModifier.liftOverGTrack(track.path, trackFn, rsidMap)
                outGSuite.addTrack(gSuiteTrack)

            except Exception as e:
                track.comment = 'An error occurred for the following track: ' + str(e)
                errorGSuite.addTrack(track)

            progressViewer.update()

        # Update reference genome of all tracks and write to file
        outGSuite.setGenomeOfAllTracks(choices.refGenome)
        composeToFile(outGSuite, galaxyFn)
        composeToFile(errorGSuite, cls.extraGalaxyFn[cls.HISTORY_ERROR_TITLE])
        writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.webtools.clustering.CreateLDTrack import CreateLDTrack
        from quick.webtools.clustering.RsidMapper import RsidMapper

        gSuite = getGSuiteFromGalaxyTN(choices.gSuite)
        outGSuite = GSuite()
        errorGSuite = GSuite()

        progressViewer = ProgressViewer([('Manipulate tracks', gSuite.numTracks() + 24)],
                                        cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE])

        ldDict = CreateLDTrack.getLDDict(float(choices.rsquare))
        rsidDict = RsidMapper.createRsidMappingFromStaticFiles(progressViewer, choices.refGenome)
        hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE]

        for track in gSuite.allTracks():
            fileName = cls.getFilenameWithGTrackSuffix(track.path)
            title = getTitleWithSuffixReplaced(track.title, 'gtrack')

            try:
                uri = GalaxyGSuiteTrack.generateURI(
                    galaxyFn=hiddenStorageFn,
                    extraFileName=fileName,
                    suffix='gtrack'
                )

                gSuiteTrack = GSuiteTrack(
                    uri,
                    title=title,
                    genome=track.genome,
                    trackType='points',
                    attributes=track.attributes
                )

                trackFn = gSuiteTrack.path
                CreateLDTrack.parseFileIntoPointTrack(track.path, trackFn, ldDict, rsidDict)
                outGSuite.addTrack(gSuiteTrack)

            except Exception as e:
                track.comment = 'An error occurred for the following track: ' + str(e)
                errorGSuite.addTrack(track)

            progressViewer.update()

        outGSuite.setGenomeOfAllTracks(choices.refGenome)
        composeToFile(outGSuite, galaxyFn)
        composeToFile(errorGSuite, cls.extraGalaxyFn[cls.HISTORY_ERROR_TITLE])
        writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
Пример #3
0
    def testFullCompose(self):
        gSuite = GSuite()
        gSuite.addTrack(GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed',
                                    title='Track', \
                                    attributes=OrderedDict([('cell', 'k562'),
                                                            ('antibody', 'cMyb')])))
        gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2.bed',
                                    title='Track2', \
                                    attributes=OrderedDict([('cell', 'GM12878'),
                                                            ('antibody', 'cMyc')])))
        gSuite.addTrack(
            GSuiteTrack('https://server.other.com/path/to/file3.bed',
                        attributes=OrderedDict([('cell', 'GM12878'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(GSuiteTrack('rsync://server.other.com/path/to/file4;wig',
                                    title='Track4', \
                                    attributes=OrderedDict([('cell', 'NHFL')])))
        gSuite.addTrack(GSuiteTrack('hb:/track/name/hierarchy', title='Track'))
        gSuite.addTrack(GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name',
                                    title='Track', \
                                    attributes=OrderedDict([('cell', 'k562'),
                                                            ('antibody', 'cMyb')])))
        gSuite.addTrack(GSuiteTrack('file:/path/to/file.btrack?track=track:name',
                                    title='Track name7', \
                                    attributes=OrderedDict([('antibody', 'cMyb'),
                                                            ('extra', 'yes')])))
        gSuite.setGenomeOfAllTracks('hg18')

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: multiple\n' \
            '##file format: multiple\n' \
            '##track type: unknown\n' \
            '##genome: hg18\n' \
            '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \
            'ftp://server.somewhere.com/path/to/file1.bed\tTrack\tprimary\tk562\tcMyb\t.\n' \
            'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \
            'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \
            'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \
            'hb:/track/name/hierarchy\tTrack (2)\tpreprocessed\t.\t.\t.\n' \
            'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack (3)\tpreprocessed\tk562\tcMyb\t.\n' \
            'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n'

        self.assertEquals(targetOutput, output)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack
        from gold.gsuite.GSuiteComposer import composeToFile
        from gold.gsuite.GSuiteFunctions import getTitleWithSuffixReplaced
        from quick.gsuite.GSuiteHbIntegration import \
            writeGSuiteHiddenTrackStorageHtml
        from quick.extra.ProgressViewer import ProgressViewer
        from quick.util.CommonFunctions import ensurePathExists

        gSuite = getGSuiteFromGalaxyTN(choices.history)
        outGSuite = GSuite()
        errorGSuite = GSuite()

        progressViewer = ProgressViewer(
            [('Manipulate tracks', gSuite.numTracks())], galaxyFn)

        hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'storage', cls.OUTPUT_DESCRIPTION, choices.history)]

        for track in gSuite.allTracks():
            newSuffix = cls._getSuffix(choices, track)

            fileName = os.path.basename(track.path)
            fileName = changeSuffixIfPresent(fileName,
                                             oldSuffix=track.suffix,
                                             newSuffix=newSuffix)
            title = getTitleWithSuffixReplaced(track.title, newSuffix)

            try:
                if fileName.endswith('.' + newSuffix):
                    uri = GalaxyGSuiteTrack.generateURI(
                        galaxyFn=hiddenStorageFn, extraFileName=fileName)
                else:
                    uri = GalaxyGSuiteTrack.generateURI(
                        galaxyFn=hiddenStorageFn,
                        extraFileName=fileName,
                        suffix=newSuffix)

                gSuiteTrack = GSuiteTrack(uri,
                                          title=title,
                                          genome=track.genome,
                                          attributes=track.attributes)

                trackFn = gSuiteTrack.path
                ensurePathExists(trackFn)
                cls._runOperation(choices, track.path, trackFn)

                outGSuite.addTrack(gSuiteTrack)

            except Exception as e:
                track.comment = 'An error occurred for the following track: ' + str(
                    e).partition('\n')[0]
                errorGSuite.addTrack(track)

            progressViewer.update()

        primaryFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'primary', cls.OUTPUT_DESCRIPTION, choices.history)]

        composeToFile(outGSuite, primaryFn)

        errorFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'nomanipulate', datasetInfo=choices.history)]
        composeToFile(errorGSuite, errorFn)

        writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
Пример #5
0
    def printGSuite(cls, choices, cols, rows, colListString, outFile):
        #print cols
        from quick.extra.ProgressViewer import ProgressViewer

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer

        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.GtrackComposer import ExtendedGtrackComposer
        from gold.origdata.GESourceWrapper import ListGESourceWrapper
        from gold.origdata.GenomeElement import GenomeElement

        from collections import defaultdict
        from copy import copy
        from urllib import quote

        from unidecode import unidecode
        from pyliftover import LiftOver

        gSuite = GSuite()

        diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME)
        chrColIndex = cols.index(cls.CHR_COLUMN_NAME)
        startColIndex = cols.index(cls.START_COLUMN_NAME)
        valColIndex = cols.index(cls.VAL_COLUMN_NAME)
        
        orderedExtraKeys = copy(cols)
        extraIndexes = range(len(cols))
        for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME,
                        cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]:
            extraIndexes.remove(cols.index(colName))
            orderedExtraKeys.remove(colName)
        orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys]

        diseaseToRowsDict = defaultdict(list)
        for row in rows:
            disease = row[diseaseColIndex]
            if isinstance(disease, unicode):
                disease = unidecode(disease).replace('\x00', '')

            diseaseToRowsDict[disease].append(row)

        progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))],
                                        cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] )

        for disease in sorted(diseaseToRowsDict.keys()):
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE],
                                                extraFileName=disease.replace('/', '_') + '.gtrack')
            gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME)
            gSuite.addTrack(gSuiteTrack)

            shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME
            if shouldLiftOver:
                liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME)

            geList = []
            for row in diseaseToRowsDict[disease]:
                extra = {}
                for col, index in zip(orderedExtraKeys, extraIndexes):
                    cell = row[index].strip()
                    if isinstance(cell, unicode):
                        cell = unidecode(cell)

                    extra[col] = cell if cell != '' else '.'

                chrom = 'chr' + row[chrColIndex]
                if chrom == 'chr23':
                    chrom = 'chrX'
                if chrom == 'chr24':
                    chrom = 'chrY'
                if chrom == 'chrMT':
                    chrom = 'chrM'

                start = int(row[startColIndex])
                if shouldLiftOver:
                    newPosList = liftOver.convert_coordinate(chrom, start)
                    if newPosList is None or len(newPosList) != 1:
                        print 'SNP with position %s on chromosome %s ' % (chrom, start) +\
                              'could not be lifted over from reference genome ' +\
                              '%s to %s (for disease/trait "%s")' % \
                              (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease)
                    else:
                        chrom, start = newPosList[0][0:2]
                #print extra
                geList.append(GenomeElement(chr=chrom, start=start,
                                            val=row[valColIndex], orderedExtraKeys=orderedExtraKeys,
                                            extra=extra))

            geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH)
            wrappedGeSource = ListGESourceWrapper(geSource, geList)
            composer = ExtendedGtrackComposer(wrappedGeSource)
            composer.composeToFile(gSuiteTrack.path)

            progressViewer.update()

        GSuiteComposer.composeToFile(gSuite, outFile)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack
        from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource
        from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix
        from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from quick.application.GalaxyInterface import GalaxyInterface
        from quick.application.UserBinSource import UserBinSource
        from quick.extra.TrackExtractor import TrackExtractor

        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gSuite)

        if choices.withOverlaps == cls.NO_OVERLAPS:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory)
            else:
                filterTrackName = choices.track.split(':')
        else:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory)
                binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory)
            else:
                regSpec = 'track'
                binSpec = choices.track

            userBinSource = UserBinSource(regSpec, binSpec, genome)

        desc = cls.OUTPUT_GSUITE_DESCRIPTION
        emptyFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)]
        primaryFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)]
        errorFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)]
        preprocessedFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)]
        hiddenStorageFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)]

        analysisDef = '-> TrackIntersectionStat'
#         analysisDef = '-> TrackIntersectionWithValStat'

        numTracks = gSuite.numTracks()
        progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks),
                                         (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn)
        emptyGSuite = GSuite()
        primaryGSuite = GSuite()

        for track in gSuite.allTracks():
            newSuffix = cls.OUTPUT_TRACKS_SUFFIX
            extraFileName = os.path.sep.join(track.trackName)
            extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix)
            title = getTitleWithSuffixReplaced(track.title, newSuffix)

            primaryTrackUri = GalaxyGSuiteTrack.generateURI(
                galaxyFn=hiddenStorageFn, extraFileName=extraFileName,
                suffix=newSuffix if not extraFileName.endswith(newSuffix) else '')
            primaryTrack = GSuiteTrack(primaryTrackUri, title=title,
                                       genome=track.genome, attributes=track.attributes)

            if choices.withOverlaps == cls.NO_OVERLAPS:
                res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*',
                                                 genome=genome, galaxyFn=galaxyFn, username=username)

                trackViewList = [res[key]['Result'] for key in sorted(res.keys())]

                tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList)

                composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX)
                composerCls(tvGeSource).composeToFile(primaryTrack.path)
            else:
                TrackExtractor.extractOneTrackManyRegsToOneFile( \
                    track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \
                    globalCoords=True, asOriginal=False, allowOverlaps=True)

            # Temporary hack until better solution for empty result tracks have been implemented

            from gold.origdata.GenomeElementSource import GenomeElementSource
            geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX)

            try:
                geSource.parseFirstDataLine()
                primaryGSuite.addTrack(primaryTrack)
            except Exception, e: # Most likely empty file
                primaryTrack.comment = e.message
                emptyGSuite.addTrack(primaryTrack)
                numTracks -= 1
                progressViewer.updateProgressObjectElementCount(
                    cls.PROGRESS_PREPROCESS_MSG, numTracks)
            #

            progressViewer.update()
Пример #7
0
    def testAddGSuiteTracks(self):
        gSuite = GSuite()
        gSuite.setGenomeOfAllTracks('hg18')

        self.assertEqual('unknown', gSuite.genome)

        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed',
                        title='Track1',
                        attributes=OrderedDict([('cell', 'k562'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack('http://server.other.com/path/to/file2.bed',
                        title='Track2',
                        attributes=OrderedDict([('cell', 'GM12878'),
                                                ('antibody', 'cMyc')])))
        gSuite.addTrack(
            GSuiteTrack('https://server.other.com/path/to/file3.bed',
                        attributes=OrderedDict([('cell', 'GM12878'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack(
                'rsync://server.other.com/other/path/to/file3.bed;bed9',
                attributes=OrderedDict([('cell', 'NHFL')])))
        gSuite.setGenomeOfAllTracks('hg18')

        self.assertEqual('remote', gSuite.location)
        self.assertEqual('primary', gSuite.fileFormat)
        self.assertEqual('unknown', gSuite.trackType)
        self.assertEqual('hg18', gSuite.genome)
        self.assertEqual(['cell', 'antibody'], gSuite.attributes)

        self.assertEqual(False, gSuite.isPreprocessed())
        self.assertEqual(True, gSuite.hasCustomTitles())

        self.assertEqual(4, gSuite.numTracks())
        self.assertEqual(['hg18'] * 4, [x.genome for x in gSuite.allTracks()])
        self.assertEqual(['Track1', 'Track2', 'file3.bed', 'file3.bed (2)'],
                         gSuite.allTrackTitles())
        self.assertEqual(['unknown'], gSuite.allTrackTypes())

        gSuite.addTrack(
            GSuiteTrack('hb:/track/name/hierarchy',
                        title='Track1',
                        genome='hg19',
                        trackType='segments'))
        self.assertEqual('multiple', gSuite.genome)

        gSuite.addTrack(
            GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name',
                        title='Track2',
                        attributes=OrderedDict([('cell', 'k562'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack('file:/path/to/file.btrack?track=track:name',
                        title='Track2',
                        attributes=OrderedDict([('antibody', 'cMyb'),
                                                ('extra', 'yes')])))

        self.assertEqual('multiple', gSuite.location)
        self.assertEqual('multiple', gSuite.fileFormat)
        self.assertEqual('unknown', gSuite.trackType)
        self.assertEqual('unknown', gSuite.genome)
        self.assertEqual(['cell', 'antibody', 'extra'], gSuite.attributes)

        self.assertEqual(False, gSuite.isPreprocessed())
        self.assertEqual(True, gSuite.hasCustomTitles())

        self.assertEqual(7, gSuite.numTracks())
        self.assertEqual(['hg18'] * 4 + ['hg19'] + ['unknown'] * 2,
                         [x.genome for x in gSuite.allTracks()])
        self.assertEqual([
            'Track1', 'Track2', 'file3.bed', 'file3.bed (2)', 'Track1 (2)',
            'Track2 (2)', 'Track2 (3)'
        ], gSuite.allTrackTitles())
        self.assertEqual(['segments', 'unknown'], gSuite.allTrackTypes())

        self.assertRaises(
            InvalidFormatError,
            gSuite.addTrack,
            GSuiteTrack('https://server.third.com/path/to/file3.bed'),
            allowDuplicateTitles=False)
Пример #8
0
    def testSimpleTitleDuplicate(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed'))
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/other/path/to/file1.bed'))
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/third/path/to/file1.bed'))
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/last/path/to/file1.bed'))

        self.assertEqual(
            ['file1.bed', 'file1.bed (2)', 'file1.bed (3)', 'file1.bed (4)'],
            gSuite.allTrackTitles())
    def execute(cls, choices, galaxyFn=None, username=''):
        import os
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        # match = int(choices.match)
        # mismatch = int(choices.mismatch)
        # delta = int(choices.delta)
        # pm = int(choices.pm)
        # pi = int(choices.pi)
        # minscore = int(choices.minscore)
        # maxperiod = int(choices.maxperiod)
        minConsensusLength = int(choices.minconsensus) if choices.minconsensus.isdigit() else None
        maxConsensusLength = int(choices.maxconsensus) if choices.maxconsensus.isdigit() else None
        minCopyNumber = int(choices.mincopynumber) if choices.mincopynumber.isdigit() else None
        parameters = [choices.match, choices.mismatch, choices.delta, choices.pm,
                      choices.pi, choices.minscore, choices.maxperiod]
        resultsDict = OrderedDict()
        for gsTrack in gsuite.allTracks():
            resFile = GalaxyRunSpecificFile(['trf', gsTrack.title, gsTrack.title + '.tmp'], galaxyFn)
            ensurePathExists(resFile.getDiskPath())
            trackDirName = os.path.dirname(os.path.realpath(resFile.getDiskPath()))
            # parameters = ["2", "5", "7", "80", "10", "50", "300"] #Madeleine suggestion
            instruction = [cls.TRF_PATH, gsTrack.path] + parameters + ["-d", "-h"]
            pipe = subprocess.Popen(instruction, cwd=trackDirName, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            results, errors = pipe.communicate()

            outFileName = ".".join([os.path.basename(gsTrack.path)] + parameters + ["dat"])
            outFilePath = os.path.join(trackDirName, outFileName)

            # print outFilePath

            resultList = cls.parseTRFResultFile(outFilePath, minConsensusLength, maxConsensusLength, minCopyNumber)
            if resultList:
                resultsDict[gsTrack.title] = resultList

        if choices.regionsGSuite:
            repeatRegionsBedTracksGSuite = GSuite()
            for trackName, trfResultList in resultsDict.iteritems():
                trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                         extraFileName=("Repeat_regions_"+trackName),
                                                         suffix='bed')
                gsTrack = GSuiteTrack(trackUri, title=("Repeat regions " + trackName), genome=gsuite.genome)
                ensurePathExists(gsTrack.path)
                with open(gsTrack.path, 'w') as bedFile:
                    header = 'track name="' + trackName + '" description="' + trackName + '" priority=1'
                    bedFile.write(header + os.linesep)
                    for trfResult in trfResultList:
                        for repeatRegion in trfResult._repeatRegionList:
                            # if not repeatRegion.strand:
                            #     DebugUtil.insertBreakPoint()
                            endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition
                            bedFile.write('\t'.join([repeatRegion.chromosome,
                                                     str(repeatRegion.startPosition),
                                                     str(endPosition),
                                                     repeatRegion.bedName,
                                                     '0',
                                                     str(repeatRegion.strand)]) + os.linesep)
                repeatRegionsBedTracksGSuite.addTrack(gsTrack)

            GSuiteComposer.composeToFile(repeatRegionsBedTracksGSuite, cls.extraGalaxyFn['Repeat regions (bed) GSuite'])

        if choices.monomersGSuite:
            monomersBedTracksGSuite = GSuite()
            for trackName, trfResultList in resultsDict.iteritems():
                trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                         extraFileName=("Repeat_monomers_"+trackName),
                                                         suffix='bed')
                gsTrack = GSuiteTrack(trackUri, title=("Repeat monomers " + trackName), genome=gsuite.genome)
                ensurePathExists(gsTrack.path)
                with open(gsTrack.path, 'w') as bedFile:
                    header = 'track name="' + trackName + '" description="' + trackName + '" priority=1'
                    bedFile.write(header + os.linesep)
                    for trfResult in trfResultList:
                        for repeatRegion in trfResult._repeatRegionList:
                            # if not repeatRegion.strand:
                            #     DebugUtil.insertBreakPoint()
                            for repeatMonomer in repeatRegion._monomers:
                            # endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition
                                bedFile.write('\t'.join([repeatRegion.chromosome,
                                                         str(repeatMonomer.startPosition),
                                                         str(repeatMonomer.endPosition),
                                                         repeatMonomer.bedName,
                                                         '0',
                                                         str(repeatRegion.strand)]) + os.linesep)

                monomersBedTracksGSuite.addTrack(gsTrack)

            GSuiteComposer.composeToFile(monomersBedTracksGSuite, cls.extraGalaxyFn['Repeat monomers (bed) GSuite'])


        ###################
        analysisParamsTableColumnTitles = ['Parameter', 'Selected value']
        analysisParamsDict = OrderedDict()
        analysisParamsDict['Tandem Repeat Finder tool version'] = cls.TRF_VERSION
        analysisParamsDict.update(
            OrderedDict([
            ('Match', choices.match),
            ('Mismatch', choices.mismatch),
            ('Delta', choices.delta),
            ('Matching probability (Pm)', choices.pm),
            ('Indel probability (Pi)', choices.pi),
            ('Min score', choices.minscore),
            ('Max period', choices.maxperiod),
            ('Min consensus length', choices.minconsensus),
            ('Max consensus length', choices.maxconsensus),
            ('Min copy number', choices.mincopynumber)]
            )
        )
        ###################

        ###################
        countTableColumnTitles = ['Name', 'Nr of repeat regions', 'Avg copy number', 'Min copy number',
                                  'Max copy number', 'Avg consensus length', 'Min consensus length',
                                  'Max consensus length']
        countTableDict = OrderedDict()
        from numpy import mean
        for trackName, trfResultList in resultsDict.iteritems():
            countTableDict[trackName] = []
            repeatRegionsNr = sum([x.repeatRegionsCount for x in trfResultList])
            countTableDict[trackName].append(repeatRegionsNr)
            copyNumberList =[]
            for trfRes in trfResultList:
                copyNumberList += trfRes.copyNumberList if \
                    choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else trfRes.realCopyNumberList
            countTableDict[trackName].append(mean(copyNumberList))
            countTableDict[trackName].append(min(copyNumberList))
            countTableDict[trackName].append(max(copyNumberList))

            consensusLengthList = []
            for trfRes in trfResultList:
                consensusLengthList += trfRes.consensusLengths
            countTableDict[trackName].append(mean(consensusLengthList))
            countTableDict[trackName].append(min(consensusLengthList))
            countTableDict[trackName].append(max(consensusLengthList))

        ###################

        core = HtmlCore()
        core.begin()
        core.divBegin()
        # core.paragraph('''This tool reports repeat regions discovered by the TRF tool
        #                 ''')
        core.tableFromDictionary(analysisParamsDict, columnNames=analysisParamsTableColumnTitles, sortable=False)
        core.divEnd()
        core.divBegin()
        core.tableFromDictionary(countTableDict, columnNames=countTableColumnTitles,
                                 tableId='repeatCounts', sortable=True, presorted=0)
        core.divEnd()
        # core.divBegin()
        # for k, v in resultsDict.iteritems():
        #     core.line('track: ' + k)
        #
        #     for val in v:
        #         core.line(str(val))
        # core.divEnd()
        core.end()

        print core
Пример #10
0
def selectRowsFromGSuiteByIndex(gSuite, idxList):
    trackList = list(gSuite.allTracks())
    reducedTrackList = [trackList[i] for i in idxList]
    reducedGSuite = GSuite(trackList=reducedTrackList)
    return reducedGSuite
Пример #11
0
def selectRowsFromGSuiteByTitle(gSuite, titleList):
    reducedTrackList = [gSuite.getTrackFromTitle(title) for title in titleList]
    reducedGSuite = GSuite(trackList=reducedTrackList)
    return reducedGSuite
from collections import OrderedDict
from gold.gsuite.GSuite import GSuite
from gold.gsuite.GSuiteTrack import GSuiteTrack, FtpGSuiteTrack, HbGSuiteTrack
import gold.gsuite.GSuiteComposer as GSuiteComposer
import gold.gsuite.GSuiteParser as GSuiteParser

gSuite = GSuite()

uri1 = FtpGSuiteTrack.generateURI(netloc='server.com', path='file.bed')
gSuite.addTrack(
    GSuiteTrack(uri1,
                title='Track1',
                attributes=OrderedDict([('a', 'yes'), ('b', 'no')])))

uri2 = HbGSuiteTrack.generateURI(
    trackName=['Genes and gene subsets', 'Genes', 'Refseq'])
gSuite.addTrack(
    GSuiteTrack(uri2, attributes=OrderedDict([('b', 'no'), ('c', 'yes')])))

gSuite.setGenomeOfAllTracks('hg19')

contents = GSuiteComposer.composeToString(gSuite)

print 'GSuite file contents'
print '--------------------'
print contents

gSuite2 = GSuiteParser.parseFromString(contents)

print 'Various ways of direct access'
print '-----------------------------'