Пример #1
0
    def createRSquareGraph(cls, ldGraphTrackName, r2_threshold):
        """
        Creates a dictionary of all pairs in a linked point track.
        Variants in LD must have rsquare >= the rsquare threshold passed to the function.

        :param ldGraphTrackName: linked point track, as chosen in tool (choices.ldtrack)
        :param r2_threshold: Lower limit of square value
        :return: Dictionary of all ld-pairs with sorted key = (rsid1, rsid2), value = rSquare
        """
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource

        fileName = ExternalTrackManager.extractFnFromGalaxyTN(ldGraphTrackName)
        suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(ldGraphTrackName)
        gtSource = GtrackGenomeElementSource(fileName, suffix=suffix)

        r2graph = {}

        for ge in gtSource:
            rsid = ge.id
            edges = ge.edges
            weights = ge.weights

            for i in range(0, len(edges)):
                ldRsid = edges[i]
                r2 = weights[i]

                if r2 >= float(r2_threshold):
                    cls.addEdge(r2graph, rsid, ldRsid, r2)

        return r2graph
Пример #2
0
 def getGeSourceList(cls, genome, tracks):
     from quick.application.ExternalTrackManager import ExternalTrackManager
     from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
     from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
     from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource
     geSourceList = []
     trackNamesWithoutPath = []
     for track in tracks:
         try:
             fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                 track)
             fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
             if fileType == 'category.bed':
                 geSourceList.append(BedCategoryGenomeElementSource(fn))
             elif fileType == 'gtrack':
                 geSourceList.append(GtrackGenomeElementSource(fn))
             else:
                 geSourceList.append(BedGenomeElementSource(fn))
             trackNamesWithoutPath.append(
                 ExternalTrackManager.extractNameFromHistoryTN(track))
         except:  # it is not a history, must be in HB track repository
             geSourceList.append(
                 FullTrackGenomeElementSource(genome,
                                              track,
                                              allowOverlaps=True))
             trackNamesWithoutPath.append(':'.join(track))
     return geSourceList, trackNamesWithoutPath
    def execute(cls, choices, galaxyFn=None, username=''):
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from quick.application.ExternalTrackManager import ExternalTrackManager

        genome = choices.genome if choices.selectGenome == 'Yes' else None
        selectedHistories = [
            key for key, val in choices.history.iteritems() if val is not None
        ]

        gSuite = GSuite()

        for histGalaxyId in selectedHistories:
            galaxyTrackName = choices.history[histGalaxyId].split(':')

            histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(
                galaxyTrackName)
            histName = ExternalTrackManager.extractNameFromHistoryTN(
                galaxyTrackName)
            histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                galaxyTrackName)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn,
                                                suffix=histSuffix)
            gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome))

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
 def getOutputFormat(choices):
     '''The format of the history element with the output of the tool.
     Note that html output shows print statements, but that text-based output
     (e.g. bed) only shows text written to the galaxyFn file.
     '''
     if choices[0]:
         inputFormat = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
             choices[0].split(':'))
         return inputFormat
 def _getGESource(choices):
     genome = choices.genome if choices.selectGenome == 'Yes' else None
     galaxyTN = choices.history.split(':')
     suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(galaxyTN)
     fn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN)
     return GenomeElementSource(fn,
                                genome=genome,
                                printWarnings=False,
                                suffix=suffix)
Пример #6
0
    def validateAndReturnErrors(cls, choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        errorString = cls._checkHistoryTrack(choices, 'snp', choices.genome)
        if errorString:
            return errorString

        from quick.application.ExternalTrackManager import ExternalTrackManager
        fileName = choices.snp
        if fileName != None and fileName != "":
            fName = ExternalTrackManager.extractFnFromGalaxyTN(fileName)
            suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(fileName)
            from gold.origdata.GenomeElementSource import GenomeElementSource
            geSource = GenomeElementSource(fName, suffix=suffix)


            # Hacky way to check validity:
            # Check for errors when reading first column
            # Probably more correct ways to do this?
            try:
                for ge in geSource:
                    chr = ge.chr
                    start = ge.mutated_from_allele
                    from_allele = ge.mutated_to_allele
                    to_allele = ge.mutated_to_allele
                    break
            except:
                return "Invalid SNP data file. The SNP data file should as a minimum contain the following columns:" + \
                        " seqid, start, end, mutated_from_allele, mutated_to_allele"

        errorString = cls._checkGSuiteFile(choices.gsuite)
        if errorString:
            return errorString

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        errorString = cls._checkGSuiteRequirements(
            gSuite,
            allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
            allowedFileFormats=cls.GSUITE_ALLOWED_FILE_TYPES,
            allowedTrackTypes=cls.GSUITE_ALLOWED_TRACK_TYPES)

        if errorString:
            return errorString

        errorString = cls._validateGenome(choices.genome)
        if errorString:
            return errorString
Пример #7
0
    def _validateFirstLine(galaxyTN, genome=None, fileStr='file'):
        try:
            from quick.application.ExternalTrackManager import ExternalTrackManager
            from gold.origdata.GenomeElementSource import GenomeElementSource

            suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                galaxyTN)
            fn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN)

            GenomeElementSource(fn, genome, suffix=suffix).parseFirstDataLine()

        except Exception, e:
            return fileStr.capitalize() + ' invalid: ' + str(e)
 def validateAndReturnErrors(choices):
     genome = choices.genome if choices.selectGenome == 'Yes' else None
     
     if genome == '':
         return 'Please select a genome build.'
 
     error = GeneralGuiTool._checkHistoryTrack(choices, 'history', GenomeElementSource, genome)
     if error:
        return error
         
     if choices.conversion is None:
         suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.history.split(':'))
         return 'No conversions available for the selected file. Please make ' \
                'sure that the file type is correct. Current file type: %s' % suffix
 def execute(choices, galaxyFn=None, username=''):
     '''Is called when execute-button is pushed by web-user.
     Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
     If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
     choices is a list of selections made by web-user in each options box.
     '''
     genome = choices[1] if choices[0] == 'Yes' else None
     suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices[2].split(':'))
     inFn = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
     
     try:
         standardizeGtrackFileAndWriteToFile(inFn, galaxyFn, genome, suffix=suffix)
     except Exception, e:
         import sys
         print >> sys.stderr, e
    def validateAndReturnErrors(choices):
        genome = choices.genome if choices.selectGenome == 'Yes' else None

        if genome == '':
            return 'Please select a genome build.'

        error = GeneralGuiTool._checkHistoryTrack(choices, 'history', genome)
        if error:
            return error

        if choices.conversion is None:
            suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                choices.history.split(':'))
            return 'No conversions available for the selected file. Please make ' \
                   'sure that the file type is correct. Current file type: %s' % suffix
 def execute(cls, choices, galaxyFn=None, username=''):
     '''Is called when execute-button is pushed by web-user.
     Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
     If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
     choices is a list of selections made by web-user in each options box.
     '''
     
     genome = choices[1] if choices[0] == 'Yes' else None
     suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices[2].split(':'))
     inFn = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
     
     try:
         standardizeGtrackFileAndWriteToFile(inFn, galaxyFn, genome, suffix=suffix)
     except Exception, e:
         import sys
         print >> sys.stderr, e
Пример #12
0
 def getRegsAndBinsSpec(choices): 
     '''
     Returns teh regSpec and binSpec for the choices made on the gui.
     '''
     regsMapper = {'Chromosome arms':'__chrArms__','Chromosomes':'__chrs__','Cytobands':'__chrBands__','Genes(Ensembl)':'__genes__','ENCODE Pilot regions':'__encode__'}
     if choices.CompareIn == 'Custom specification':
         regSpec = choices.CustomRegion
         binSpec = choices.BinSize
     elif regsMapper.get(choices.CompareIn):
         regSpec = regsMapper[choices.CompareIn]
         binSpec = choices.Bins
     else:
         histItem = choices.HistoryBins.split(':')
         binSpec = ExternalTrackManager.extractFnFromGalaxyTN(histItem)
         regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(histItem)
     
     return regSpec, binSpec
Пример #13
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        galaxyTn = choices.history.split(':')
        inFn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTn)
        suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(galaxyTn)

        treatTrackAs = cls._TRACK_TYPE_CONVERSION_OPTIONS[choices.conversion]

        GalaxyInterface.expandBedSegments(inFn, galaxyFn, choices.genome, \
                                          parseShortenedSizeSpec(choices.upstream), parseShortenedSizeSpec(choices.downstream), \
                                          treatTrackAs, removeChrBorderCrossing=(choices.chrBorderHandling=='Removing'), suffix=suffix)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        analysisDef = 'dummy -> ProportionCountStat'
        genome = choices[0]
        tn1 = choices[2].split(':')
        binSpec = choices[4]
        regSpec = 'track'
        if choices[3] == 'from history':
            regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                choices[4].split(':'))
            binSpec = ExternalTrackManager.extractFnFromGalaxyTN(
                choices[4].split(':'))
        numBins = open(binSpec).read().count('\n')
        if numBins > 330000:
            gold.application.StatRunner.MAX_NUM_USER_BINS = numBins

        percent = float(choices[5]) if float(
            choices[5]) <= 1.0 else float(choices[5]) / 100.0
        GalaxyInterface.ALLOW_OVERLAPPING_USER_BINS = True
        resultDict = GalaxyInterface.runManual([tn1],
                                               analysisDef,
                                               regSpec,
                                               binSpec,
                                               genome,
                                               galaxyFn,
                                               printResults=False,
                                               printProgress=True)
        overlapRegions = [
            k for k, v in resultDict.items() if v['Result'] >= percent
        ]
        with open(galaxyFn, 'w') as utfil:
            for i in overlapRegions:
                print >> utfil, '\t'.join([i.chr, str(i.start), str(i.end)])
Пример #15
0
def getGeSource(track, genome=None):
    from quick.application.ExternalTrackManager import ExternalTrackManager
    from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
    from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
    from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource

    if isinstance(track, basestring):
        track = track.split(':')

    try:
        fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(track)
        fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
        if fileType == 'category.bed':
            return BedCategoryGenomeElementSource(fn)
        elif fileType == 'gtrack':
            return GtrackGenomeElementSource(fn)
        else:
            return BedGenomeElementSource(fn)
    except:
        return FullTrackGenomeElementSource(genome, track, allowOverlaps=False)
    def execute(cls, choices, galaxyFn=None, username=''):

        histItem = choices[0].split(':')
        filSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
            histItem)
        histFile = ExternalTrackManager.extractFnFromGalaxyTN(histItem)
        galaxyOutputFile = GalaxyRunSpecificFile(['circos.png'], galaxyFn)

        outputFn = galaxyOutputFile.getDiskPath(True)
        type = 'line' if filSuffix == 'bedgraph' else 'highlight'
        paramDict = {histFile: {'type': type, 'r0': '0.90r', 'r1': '1.0r'}}
        if type == 'line':
            try:
                vals = [
                    float(line.strip().split()[-1])
                    for line in open(histFile, 'r') if line.strip()[0] == 'c'
                ]
            except Exception, e:
                print e
            paramDict[histFile]['max'] = max(vals)
            paramDict[histFile]['min'] = min(vals)
Пример #17
0
 def _checkHistoryTrack(prevChoices, historyChoiceIndex, geSourceCls, genome=None, filetype='', validateFirstLine=True):
     fileStr = filetype + ' file' if filetype else 'file'
     
     if type(historyChoiceIndex) == int:
         historyTrackName = prevChoices[historyChoiceIndex]
     else:
         historyTrackName = getattr(prevChoices, historyChoiceIndex)
     
     if historyTrackName is None:
         return 'Please select a ' + fileStr + ' from history.'
     
     if validateFirstLine:
         from quick.application.ExternalTrackManager import ExternalTrackManager
         galaxyTN = historyTrackName.split(':')
         suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(galaxyTN)
         fn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN)
         
         try:
             geSourceCls(fn , genome, suffix=suffix).parseFirstDataLine()
 
         except Exception, e:
             return fileStr.capitalize() + ' invalid: ' + str(e)
Пример #18
0
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from quick.application.ExternalTrackManager import ExternalTrackManager

        genome = choices.genome if choices.selectGenome == 'Yes' else None
        selectedHistories = [
            key for key, val in choices.history.iteritems() if val is not None
        ]

        gSuite = GSuite()

        for histGalaxyId in selectedHistories:
            galaxyTrackName = choices.history[histGalaxyId].split(':')

            histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(
                galaxyTrackName)
            histName = ExternalTrackManager.extractNameFromHistoryTN(
                galaxyTrackName)
            histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                galaxyTrackName)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn,
                                                suffix=histSuffix)
            gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome))

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
Пример #19
0
    def createPositionDict(cls, ldGraphTrackName):
        """
        Creates position dictionary from linked point track. To be used for empiric exploration of positions,
        based on LD correlation (rsquare values)
        :param ldGraphTrackName: linked point track, as chosen in tool (choices.ldtrack)
        :return: Dictionary of all nodes in track with key = rsid, value = position

        """
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource

        fileName = ExternalTrackManager.extractFnFromGalaxyTN(ldGraphTrackName)
        suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(ldGraphTrackName)
        gtSource = GtrackGenomeElementSource(fileName, suffix=suffix)

        positionDict = {}

        for ge in gtSource:
            rsid = ge.id
            position = ge.start
            cls.addPosition(positionDict, rsid, position)

        return positionDict
    def execute(cls, choices, galaxyFn=None, username=''):
        inGSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        if choices.genome != inGSuite.genome:
            inGSuite.setGenomeOfAllTracks(choices.genome)

        registerGSuiteTrackClass(FileGSuiteTrack)

        outGSuite = GSuite()

        for track in inGSuite.allTracks():
            origGalaxyTN = ETM.createGalaxyTnFromExternalTn(track.trackName)

            uri = FileGSuiteTrack.generateURI(
                path=ETM.extractFnFromGalaxyTN(origGalaxyTN),
                suffix=ETM.extractFileSuffixFromGalaxyTN(origGalaxyTN))
            newTrack = GSuiteTrack(uri,
                                   title=track.title,
                                   trackType=track.trackType,
                                   genome=track.genome,
                                   attributes=track.attributes)
            outGSuite.addTrack(newTrack)

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
    def _getSnpData(self, fileName):
        global snps
        """
        f = open(fileName)
        for line in f.readlines():
            data = line.split()
            if "#" not in data[0]:
                chromosome = chrToNum(data[0])
                position = int(data[1])
                mutation = data[3].split(">")

                snps[chromosome].append(SNP(chromosome, position, mutation[0], mutation[1]))
        """

        fName = ExternalTrackManager.extractFnFromGalaxyTN(fileName)
        suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(fileName)
        from gold.origdata.GenomeElementSource import GenomeElementSource
        geSource = GenomeElementSource(fName, suffix=suffix)

        for ge in geSource:
            chromosome = chrToNum(ge.chr)
            snps[chromosome].append(
                SNP(chromosome, int(ge.start), ge.mutated_from_allele,
                    ge.mutated_to_allele))
Пример #22
0
    def validateUserBins(choices):
        '''
        See getOptionsBox1().
        '''
        from quick.util.GenomeInfo import GenomeInfo
        genome = choices.Genome
        
        regsMapper = {'Chromosome arms':'__chrArms__','Chromosomes':'__chrs__','Cytobands':'__chrBands__','Genes(Ensembl)':'__genes__','ENCODE Pilot regions':'__encode__'}
        if choices.CompareIn == 'Custom specification':

            regSpec = choices.CustomRegion
            binSpec = choices.BinSize
            if re.match('[0-9]+[mk]?', binSpec).end() != len(binSpec):
                return 'Invalid Syntax for Bin size(only numbers and the characters "mk" allowed)'
            
        elif regsMapper.get(choices.CompareIn):
            regSpec = regsMapper[choices.CompareIn]
            binSpec = choices.Bins
        else:
            histItem = choices.HistoryBins.split(':')
            binSpec = ExternalTrackManager.extractFnFromGalaxyTN(histItem)
            regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(histItem)
        
        return regSpec, binSpec
 def _getGESource(choices):
     genome = choices.genome if choices.selectGenome == 'Yes' else None
     galaxyTN = choices.history.split(':')
     suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(galaxyTN)
     fn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN)
     return GenomeElementSource(fn, genome=genome, printWarnings=False, suffix=suffix)
Пример #24
0
    def execute(cls, choices, galaxyFn=None, username=''):

        from quick.application.ExternalTrackManager import ExternalTrackManager
        from collections import defaultdict
        from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource
        from urllib import unquote
        print choices

        genome = choices[0]
        geSourceList, labelNames = [], []
        selectedHists = [
            unquote(val).split(':') for id, val in choices[1].iteritems()
            if val
        ]
        inorout = [int(x) for x in choices[2].split(',')]
        selectedHists += [
            v.split(':') for v in choices[3:]
            if v not in ['-----  Select  -----', 'no', 'yes', None, '']
        ]
        for track in selectedHists:
            try:
                fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                    track)
                fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
                if fileType == 'category.bed':
                    geSourceList.append(BedCategoryGenomeElementSource(fn))
                elif fileType == 'gtrack':
                    geSourceList.append(GtrackGenomeElementSource(fn))
                else:
                    geSourceList.append(BedGenomeElementSource(fn))

                labelNames.append(
                    ExternalTrackManager.extractNameFromHistoryTN(track))
            except:
                geSourceList.append(
                    FullTrackGenomeElementSource(genome,
                                                 track,
                                                 allowOverlaps=False))
                #labelNames.append(track[-1])
                labelNames.append(':'.join(track))

        primeList = [
            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59
        ]
        resultCounter = defaultdict(int)
        posDict = defaultdict(list)
        catDict = defaultdict(list)

        debugstring = 'debug out:'

        for index, geSource in enumerate(geSourceList):
            primeNum = primeList[index]
            prevEnd = -1
            prevChr = ''
            for ge in geSource:

                posDict[ge.chr] += [ge.start, ge.end]
                catDict[ge.chr] += [primeNum, -primeNum]
                prevEnd = ge.end
                prevChr = ge.chr

        debugstring += 'posDict elements/2: ' + str(
            sum(len(v) for v in posDict.itervalues()) / 2) + '\n'
        debugstring += 'catDict elements/2: ' + str(
            sum(len(v) for v in catDict.itervalues()) / 2) + '\n'

        #maxState = reduce( lambda x, y: x*y, primeList[:len(geSourceList)] ) #assuming all tracks are in.
        selectedState = 1
        for n in range(len(geSourceList)):
            if inorout[n]:
                selectedState = selectedState * primeList[n]

        utfil = open(galaxyFn, 'w')
        for chrom in posDict.keys():
            indxSortedList = sorted(range(len(posDict[chrom])),
                                    key=posDict[chrom].__getitem__)

            posList = posDict[chrom]
            catList = catDict[chrom]
            catCoverageDepth = defaultdict(int)

            currentState = 1
            currentPos = 0

            for indx in indxSortedList:
                pos = posList[indx]
                primeVal = catList[indx]
                #print 'pos, primeVal: ', pos, primeVal
                #print 'resultCounter: ', resultCounter
                if currentPos != pos:
                    if abs(currentState) == selectedState:
                        print >> utfil, '%s\t%i\t%i' % (chrom, currentPos, pos)
                    resultCounter[abs(currentState)] += pos - currentPos
                    #debugstring +='resultCounter='+str(resultCounter)+ ' currentPos='+ str(currentPos) + '    pos='+str(pos)+ '   chrom='+str(chrom)+  '   primeVal='+str(primeVal)+ '    catCoverageDepth='+str(catCoverageDepth) +'<br/>'
                    #print 'resultCounter,currentState,  pos and currentPos',abs(currentState),':',  pos, currentPos
                    currentPos = pos

                if primeVal < 0:
                    catCoverageDepth[abs(primeVal)] -= 1
                    if catCoverageDepth[abs(primeVal)] == 0:
                        currentState /= primeVal
                else:
                    catCoverageDepth[primeVal] += 1
                    if catCoverageDepth[primeVal] == 1:
                        currentState *= primeVal

        utfil.close()
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack
        from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource
        from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix
        from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from quick.application.GalaxyInterface import GalaxyInterface
        from quick.application.UserBinSource import UserBinSource
        from quick.extra.TrackExtractor import TrackExtractor

        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gSuite)

        if choices.withOverlaps == cls.NO_OVERLAPS:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory)
            else:
                filterTrackName = choices.track.split(':')
        else:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory)
                binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory)
            else:
                regSpec = 'track'
                binSpec = choices.track

            userBinSource = UserBinSource(regSpec, binSpec, genome)

        desc = cls.OUTPUT_GSUITE_DESCRIPTION
        emptyFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)]
        primaryFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)]
        errorFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)]
        preprocessedFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)]
        hiddenStorageFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)]

        analysisDef = '-> TrackIntersectionStat'
#         analysisDef = '-> TrackIntersectionWithValStat'

        numTracks = gSuite.numTracks()
        progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks),
                                         (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn)
        emptyGSuite = GSuite()
        primaryGSuite = GSuite()

        for track in gSuite.allTracks():
            newSuffix = cls.OUTPUT_TRACKS_SUFFIX
            extraFileName = os.path.sep.join(track.trackName)
            extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix)
            title = getTitleWithSuffixReplaced(track.title, newSuffix)

            primaryTrackUri = GalaxyGSuiteTrack.generateURI(
                galaxyFn=hiddenStorageFn, extraFileName=extraFileName,
                suffix=newSuffix if not extraFileName.endswith(newSuffix) else '')
            primaryTrack = GSuiteTrack(primaryTrackUri, title=title,
                                       genome=track.genome, attributes=track.attributes)

            if choices.withOverlaps == cls.NO_OVERLAPS:
                res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*',
                                                 genome=genome, galaxyFn=galaxyFn, username=username)

                trackViewList = [res[key]['Result'] for key in sorted(res.keys())]

                tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList)

                composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX)
                composerCls(tvGeSource).composeToFile(primaryTrack.path)
            else:
                TrackExtractor.extractOneTrackManyRegsToOneFile( \
                    track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \
                    globalCoords=True, asOriginal=False, allowOverlaps=True)

            # Temporary hack until better solution for empty result tracks have been implemented

            from gold.origdata.GenomeElementSource import GenomeElementSource
            geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX)

            try:
                geSource.parseFirstDataLine()
                primaryGSuite.addTrack(primaryTrack)
            except Exception, e: # Most likely empty file
                primaryTrack.comment = e.message
                emptyGSuite.addTrack(primaryTrack)
                numTracks -= 1
                progressViewer.updateProgressObjectElementCount(
                    cls.PROGRESS_PREPROCESS_MSG, numTracks)
            #

            progressViewer.update()
Пример #26
0
    def execute(cls, choices, galaxyFn=None, username=''):
        genome = choices.genome
         

        from quick.multitrack.MultiTrackCommon import getGSuiteDataFromGalaxyTN
        trackTitles, refTrackNameList, genome = getGSuiteDataFromGalaxyTN(choices.gsuite)
        
        queryTrackName = ExternalTrackManager.extractFnFromGalaxyTN(choices.targetTrack)
        if choices.isBasic:
            suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.targetTrack, False)
            regSpec = suffix
            binSpec = queryTrackName
        else:
            regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        
        #targetTrack = choices.targetTrack.split(':')
        #targetTrackTitle = targetTrack[-1]
        #print targetTrackTitle
        #
        #binSpec = targetTrackTitle
        #Phenotype and disease associations:Assorted experiments:Virus integration, HPV specific, Kraus and Schmitz, including 50kb flanks

        from gold.gsuite.GSuiteConstants import TITLE_COL
        from gold.gsuite.GSuite import GSuite
        from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
        from gold.gsuite.GSuiteEditor import selectColumnsFromGSuite
        staticFile=[]
        
        results = []
        for refTrack in refTrackNameList:
            analysisDef = '-> ProportionCountStat' #ProportionCountStat #CountStat
            res = GalaxyInterface.runManual([refTrack], analysisDef, regSpec, binSpec, genome, username=username, galaxyFn=galaxyFn, printRunDescription=False, printResults=False, printProgress=False)
            segCoverageProp = [res[seg]['Result'] for seg in res.getAllRegionKeys()]
            results.append(segCoverageProp)
            
            regFileNamer = GalaxyRunSpecificFile(refTrack, galaxyFn)
            staticFile.append([regFileNamer.getLink('Download bed-file'), regFileNamer.getLoadToHistoryLink('Download bed-file to History')])

        refGSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        if TITLE_COL == choices.selectColumns:
            selected = trackTitles
        else:
            selected = refGSuite.getAttributeValueList(choices.selectColumns)

        yAxisNameOverMouse=[]
        metadataAll =[]

        for x in range(0, len(selected)):
            if selected[x] == None:
                yAxisNameOverMouse.append(str(trackTitles[x]) + ' --- ' + 'None')
            else:
                if TITLE_COL == choices.selectColumns:
                    yAxisNameOverMouse.append(selected[x].replace('\'', '').replace('"', ''))
                else:
                    metadata = str(selected[x].replace('\'', '').replace('"', ''))
                    yAxisNameOverMouse.append(str(trackTitles[x]) + ' --- ' + metadata)
                    metadataAll.append(metadata)

        colorListForYAxisNameOverMouse = []
        if len(metadataAll) > 0:
            import quick.webtools.restricted.visualization.visualizationGraphs as vg
            cList = vg.colorList().fullColorList()
            uniqueCList = list(set(metadataAll))

            for m in metadataAll:
                colorListForYAxisNameOverMouse.append(cList[uniqueCList.index(m)])

        #startEnd - order in res
        startEndInterval = []
        startEnd = []
        i=0
        

        extraX=[]
        rowLabel = []
        for ch in res.getAllRegionKeys():
            rowLabel.append(str(ch.chr) + ":" + str(ch.start) + "-" + str(ch.end) + str(' (Pos)' if ch.strand else ' (Neg)'))
            if not i==0 and not i==len(res.getAllRegionKeys())-1:
                start = ch.start
                if start-end > 0:
                    startEnd.append(start-end)
                else:
                    startEnd.append('null')
                    extraX.append("""{ color: 'orange', width: 5, value: '""" + str(i-0.5) + """' }""")
                startEndInterval.append(ch.end - ch.start)
            else:
                startEndInterval.append(ch.end - ch.start)
            end = ch.end
            i+=1

        extraXAxis='plotLines: [ '
        extraXAxis = extraXAxis + ",".join(extraX)
        extraXAxis = extraXAxis + """ ],  """

        #rowLabel = res.getAllRegionKeys()
        #rowLabel = [str(x) for x in rowLabel]
        

        import quick.webtools.restricted.visualization.visualizationPlots as vp

        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.divBegin(divId='results-page')
        htmlCore.divBegin(divClass='results-section')
        htmlCore.divBegin('plotDiv')
        htmlCore.line(vp.addJSlibs())
        htmlCore.line(vp.useThemePlot())
        htmlCore.line(vp.addJSlibsExport())
        htmlCore.line(vp.axaddJSlibsOverMouseAxisisPopup())
        #vp.addGuideline(htmlCore)
        htmlCore.line(vp._addGuidelineV1())

        htmlCore.line(vp.addJSlibsHeatmap())

        from config.Config import DATA_FILES_PATH

        from proto.StaticFile import StaticFile, GalaxyRunSpecificFile

        #sf = GalaxyRunSpecificFile(['result.txt'], galaxyFn)
        #outFile = sf.getDiskPath(ensurePath=True)

        htmlCore.divBegin()
        writeFile = open(
            cls.makeHistElement(galaxyExt='tabular',
                                title='result'), 'w')
        # htmlCore.link('Get all results', sf.getURL())
        htmlCore.divEnd()

        i = 0

        writeFile.write('Track' + '\t' + '\t'.join(rowLabel)+ '\n')
        for rList in results:
            writeFile.write(str(yAxisNameOverMouse[i]) + '\t' + '\t'.join([str(r) for r in rList]) + '\n')
            i+=1




        fileOutput = GalaxyRunSpecificFile(['heatmap.png'],
                                           galaxyFn)
        ensurePathExists(fileOutput.getDiskPath())

        fileOutputPdf = GalaxyRunSpecificFile(['heatmap.pdf'],
                                              galaxyFn)
        ensurePathExists(fileOutputPdf.getDiskPath())

        cls.generateStaticRPlot(results, colorListForYAxisNameOverMouse, rowLabel, yAxisNameOverMouse,
                                colorMaps[choices.colorMapSelectList],
                                fileOutput.getDiskPath(), fileOutputPdf.getDiskPath())


        htmlCore.divBegin(divId='heatmap', style="padding: 10px 0 px 10 px 0px;margin: 10px 0 px 10 px 0px")
        htmlCore.link('Download heatmap image', fileOutputPdf.getURL())
        htmlCore.divEnd()

        if len(results) * len(results[1]) >= 10000:
            htmlCore.image(fileOutput.getURL())


        else:

            min = 1000000000
            max = -1000000000
            for rList in results:
                for r in rList:
                    if min > r:
                        min = r
                    if max < r:
                        max = r




            if max-min != 0:
                resultNormalised = []
                for rList in results:
                    resultNormalisedPart = []
                    for r in rList:
                        resultNormalisedPart.append((r-min)/(max-min))
                    resultNormalised.append(resultNormalisedPart)

                addText = '(normalised to [0, 1])'
            else:
                resultNormalised = results
                addText = ''


            hm, heatmapPlotNumber, heatmapPlot = vp.drawHeatMap(
                                                    resultNormalised,
                                                    colorMaps[choices.colorMapSelectList],
                                                    label='this.series.xAxis.categories[this.point.x] + ' + "'<br >'" + ' + yAxisNameOverMouse[this.point.y] + ' + "'<br>Overlap proportion" + str(addText) + ": <b>'" + ' + this.point.value + ' + "'</b>'",
                                                    yAxisTitle= 'Reference tracks',
                                                    categories=rowLabel,
                                                    tickInterval=1,
                                                    plotNumber=3,
                                                    interaction=True,
                                                    otherPlotNumber=1,
                                                    titleText='Overlap with reference tracks for each local region',
                                                    otherPlotData=[startEnd, startEndInterval],
                                                    overMouseAxisX=True,
                                                    overMouseAxisY=True,
                                                    yAxisNameOverMouse=yAxisNameOverMouse,
                                                    overMouseLabelY=" + 'Track: '" + ' + this.value + ' + "' '" + ' + yAxisNameOverMouse[this.value] + ',
                                                    overMouseLabelX = ' + this.value.substring(0, 20) +',
                                                    extrOp = staticFile
                                                    )
            htmlCore.line(hm)
            htmlCore.line(vp.drawChartInteractionWithHeatmap(
                [startEndInterval, startEnd],
                tickInterval=1,
                type='line',
                categories=[rowLabel, rowLabel],
                seriesType=['line', 'column'],
                minWidth=300,
                height=500,
                lineWidth=3,
                titleText=['Lengths of segments (local regions)','Gaps between consecutive segments'],
                label=['<b>Length: </b>{point.y}<br/>', '<b>Gap length: </b>{point.y}<br/>'],
                subtitleText=['',''],
                yAxisTitle=['Lengths','Gap lengths'],
                seriesName=['Lengths','Gap lengths'],
                xAxisRotation=90,
                legend=False,
                extraXAxis=extraXAxis,
                heatmapPlot=heatmapPlot,
                heatmapPlotNumber=heatmapPlotNumber,
                overMouseAxisX=True,
                overMouseLabelX = ' + this.value.substring(0, 20) +'
                ))


        htmlCore.divEnd()
        htmlCore.divEnd()
        htmlCore.divEnd()
        htmlCore.end()

        htmlCore.hideToggle(styleClass='debug')

        print htmlCore
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.origdata.BedGenomeElementSource import BedCategoryGenomeElementSource
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource
        from gold.track.GenomeRegion import GenomeRegion
        from quick.util.GenomeInfo import GenomeInfo
        from collections import defaultdict

        genome = choices[0]
        track = choices[2].split(':')
        allowOverlaps = True if choices[3] == 'Yes' else False

        regionList = []
        for chrom in GenomeInfo.getChrList(genome):
            start = 0
            chromSize = GenomeInfo.getChrLen(genome, chrom)
            regionList.append(GenomeRegion(genome, chrom, start, chromSize))

        if choices[1] == 'From Hyperbrowser repository':
            geSource = TrackGenomeElementSource(genome, track, regionList)
        else:
            fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                track)
            fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
            geSource = BedCategoryGenomeElementSource(
                fn
            ) if fileType == 'category.bed' else GtrackGenomeElementSource(fn)

        resultMinDict = defaultdict(dict)
        resultMaxDict = defaultdict(dict)
        for ge in geSource:
            if resultMaxDict[ge.chr].has_key(ge.val):
                if ge.end:
                    if resultMaxDict[ge.chr][ge.val] < ge.end:
                        resultMaxDict[ge.chr][ge.val] = ge.end
                elif resultMaxDict[ge.chr][ge.val] < ge.start:
                    resultMaxDict[ge.chr][ge.val] = ge.start

                if resultMinDict[ge.chr][ge.val] > ge.start:
                    resultMinDict[ge.chr][ge.val] = ge.start
            else:
                resultMaxDict[ge.chr][ge.val] = ge.end if ge.end else ge.start
                resultMinDict[ge.chr][ge.val] = ge.start

        utfil = open(galaxyFn, 'w')
        quitFlag = False
        errorMsg = 'Error, overlapping regions '
        catsConflicting = []
        for chrom in sorted(resultMinDict.keys()):

            for category in resultMinDict[chrom].keys():
                lower, upper = resultMinDict[chrom][category], resultMaxDict[
                    chrom][category]
                if not allowOverlaps:
                    for cat in resultMinDict[chrom]:
                        if cat != category:
                            l, u = resultMinDict[chrom][cat], resultMaxDict[
                                chrom][cat]
                            if l >= upper or u <= lower:
                                continue
                            if l > lower or u < upper:
                                quitFlag = True
                                catsConflicting.append(
                                    '(Category: %s,  Region: %i - %i) vs. (Category: %s, Region: %i - %i)'
                                    % (category, lower, upper, cat, l, u))
                                #break
                    #if quitFlag: break

                print >> utfil, '\t'.join(
                    [chrom, str(lower),
                     str(upper + 1), category])

            #if quitFlag: break
        utfil.close()

        if quitFlag:
            open(galaxyFn, 'w').write(
                'Error: overlapping resulting regions are not allowed with selected preferences:\n'
                + '\n'.join(catsConflicting))
Пример #28
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        genome = choices[0]
        regSpec = '__chrs__'
        binSpec = '*'
        if choices[6] == 'Chromosome arms':
            regSpec = '__chrArms__'
        elif choices[6] == 'Track from history...':
            #put in history bins support here
            #print choices[4:]
            regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices[7].split(':'))
            binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices[7].split(':'))
            #print 'regSpec, binSpec,', regSpec, binSpec
            lineList, counter, tooManyBins = [], 0, False
            for line in open(binSpec):
                if line.strip() !='':
                    if counter == cls.MAX_NUM_ROWS:
                        tooManyBins = True
                        break
                    lineList.append(line)
                    counter+= 1 if line.strip()[0] !='#' else 0

            if tooManyBins:
                newHist = GalaxyRunSpecificFile(['newHistFile.%s' % regSpec], galaxyFn)
                binSpec = newHist.getDiskPath(ensurePath=True)
                open(binSpec, 'w').write(''.join(lineList))

        print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
        print GalaxyInterface.getHtmlForToggles(withRunDescription=False)

        core = HtmlCore()
        core.styleInfoBegin(styleClass='debug')

        figImage = GalaxyRunSpecificFile(['VizTrackOnGenome.png'], galaxyFn)
        #StaticImage(['VizTrackOnGenome.png'])
        analysisDef = ' [normalizeRows=%s] [centerRows=%s]  -> RawVisualizationDataStat' % \
            (choices[4] == 'Scale to same size', choices[5] == 'Center')

        if choices[1] == 'HyperBrowser repository':
            trackName = choices[2].split(':')
        else:
            trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices[3].split(':'))

        res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, binSpec, genome, username=username, printResults=False, printHtmlWarningMsgs=False)

        core.styleInfoEnd()
        core.line('')

        core.tableHeader(None)
        #visPresenter = RawVisualizationPresenter(res, galaxyFn,'')#os.path.split()[0]
        #htmlStreng = visPresenter.getReference('Result', fullImage=True)
        rScript = cls.customRExecution(res, figImage.getDiskPath(ensurePath=True), '')

        figUrl = figImage.getURL()
        figLinkText ='<img src="%s" alt="Figure" height="%i" width="800"/>' % (figUrl, 20 *min(cls.MAX_NUM_ROWS, len(res)))
        core.tableLine([figImage.getLink(figLinkText)])

        rScriptGalaxyFile = GalaxyRunSpecificFile(['RScript.R'], galaxyFn)
        with open(rScriptGalaxyFile.getDiskPath(ensurePath=True), 'w') as rScriptFile:
            rScriptFile.write(rScript)

        core.tableLine([rScriptGalaxyFile.getLink('R script')])

        core.tableFooter()

        print core
        print GalaxyInterface.getHtmlEndForRuns()