示例#1
0
def createAssemblyGapsFile(genome, assemblyChars='ACGTacgt'):
    """genome assemblyChars='ACGTacgt'"""
    basePath = gcf.createOrigPath(genome, GenomeInfo.getPropertyTrackName(genome, 'gaps'),'')
    outFn = basePath + 'assemblyGaps.bed'
    qcf.ensurePathExists(outFn)
    outFile = open(outFn,'w')
    
    seqTrack = PlainTrack( GenomeInfo.getSequenceTrackName(genome) )

    anyGaps = False
    for chr in GenomeInfo.getExtendedChrList(genome):
        chrRegion = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr))
        seqTV = seqTrack.getTrackView(chrRegion)
        seq = seqTV.valsAsNumpyArray()
        
        #gapIndexes = numpy.arange(len(seq))[(seq == 'n') | (seq == 'N')]
        gapIndexes = numpy.arange(len(seq))[numpy.logical_not( numpy.logical_or.reduce([seq == x for x in assemblyChars]) )]
        gapIndexDiff = gapIndexes[1:] - gapIndexes[:-1]
        gapBeginIndexes = numpy.delete(gapIndexes, (numpy.arange(len(gapIndexDiff)) + 1)[gapIndexDiff==1])
        gapEndIndexes = numpy.delete(gapIndexes + 1, numpy.arange(len(gapIndexDiff))[gapIndexDiff==1])
        
        assert len(gapBeginIndexes) == len(gapEndIndexes)
        
        for i in xrange(len(gapBeginIndexes)):
            anyGaps = True
            outFile.write('\t'.join([chr, str(gapBeginIndexes[i]), str(gapEndIndexes[i])]) + os.linesep)
        
    if not anyGaps:
        outFile.write('\t'.join([GenomeInfo.getExtendedChrList(genome)[0], '1', '1']))
        
    outFile.close()
 def getAnchor(genome, trackName):
     track = PlainTrack(trackName)
     anchor = []
     for chrom in GenomeInfo.getChrList(genome):
         chromLen = GenomeInfo.getChrLen(genome, chrom)
         region = GenomeRegion(genome, chrom, 0, chromLen)
         tv = track.getTrackView(region)
         anchor = anchor + [str(tv.genomeAnchor)]
     
     return anchor
 def getNumberElements(genome, trackName):
     track = PlainTrack(trackName)
     numElements = []
     for chrom in GenomeInfo.getChrList(genome):
         chromLen = GenomeInfo.getChrLen(genome, chrom)
         region = GenomeRegion(genome, chrom, 0, chromLen)
         tv = track.getTrackView(region)
         numElements = numElements + [len(tv.startsAsNumpyArray())]
         
     return numElements
示例#4
0
 def createNmerChains(self, n):
     for chr in GenomeInfo.getChrList(self._genome):
         print 'Creating chains of nmers of length ', n, ' for chromosome ', chr
         chrLen = GenomeInfo.getChrLen(self._genome,chr)
         chrReg = GenomeRegion( self._genome, chr, 0, chrLen )
         seqTV = PlainTrack( GenomeInfo.getSequenceTrackName(self._genome) ).getTrackView(chrReg)
         
         #nmersAsInts = NmerAsIntSlidingWindow(n, FuncValTvWrapper(seqTV))
         nmersAsInts = NmerAsIntSlidingWindow(n, seqTV.valsAsNumpyArray())
         SameValueIndexChainsFactory.generate( nmersAsInts, chrLen, 4**n, self._createPath(n), chr )
    def createNmerChains(self, n):
        for chr in GenomeInfo.getChrList(self._genome):
            print 'Creating chains of nmers of length ', n, ' for chromosome ', chr
            chrLen = GenomeInfo.getChrLen(self._genome, chr)
            chrReg = GenomeRegion(self._genome, chr, 0, chrLen)
            seqTV = PlainTrack(GenomeInfo.getSequenceTrackName(
                self._genome)).getTrackView(chrReg)

            #nmersAsInts = NmerAsIntSlidingWindow(n, FuncValTvWrapper(seqTV))
            nmersAsInts = NmerAsIntSlidingWindow(n, seqTV.valsAsNumpyArray())
            SameValueIndexChainsFactory.generate(nmersAsInts, chrLen, 4**n,
                                                 self._createPath(n), chr)
示例#6
0
    def execute(cls, choices, galaxyFn=None, username=''):
        from gold.util.RandomUtil import random

        outputFile = open(galaxyFn, 'w')
        genome = choices[0]
        histItem = choices[2]
        trackItem = choices[3]
        chromRegsPath = GenomeInfo.getChrRegsFn(genome)

        chrSizeDict = dict([(chrom, GenomeInfo.getChrLen(genome, chrom))
                            for chrom in GenomeInfo.getChrList(genome)])
        geSource = headLinesStr = None
        if choices[1] == 'history':

            trackType = choices[2].split(':')[1]
            username = ''.join(
                [chr(random.randint(97, 122)) for i in range(6)])
            tempFn = createCollectedPath(
                genome, [],
                username + '_'.join([str(v) for v in time.localtime()[:6]]) +
                '.' + trackType)
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
                choices[2].split(':'))
            open(tempFn, 'w').write(open(fnSource, 'r').read())

            if trackType in ['valued.bed', 'category.bed', 'bed']:
                geSource = GenomeElementSorter(
                    BedGenomeElementSource(tempFn, genome=genome)).__iter__()

            #elif trackType == 'gtrack':
            #    geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
            #    headLinesStr = geSource.getHeaderLines().replace('##','\n##')

            cls.WriteExpandedElementsToFile(geSource,
                                            chrSizeDict,
                                            outputFile,
                                            headLinesStr,
                                            writeHeaderFlag=True)
            os.remove(tempFn)

        else:
            writeHeaderFlag = True
            for chrom in GenomeInfo.getChrList(genome):
                gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom])
                plTrack = PlainTrack(trackItem.split(':'))
                geSource = GenomeElementTvWrapper(
                    plTrack.getTrackView(gRegion)).__iter__()
                cls.WriteExpandedElementsToFile(geSource, chrSizeDict,
                                                outputFile, headLinesStr,
                                                writeHeaderFlag)
                writeHeaderFlag = False
        outputFile.close()
 def getSegmentSizes(genome, trackName):
     track = PlainTrack(trackName)
     segmentSize = []; sumSegmentSize = []
     for chrom in GenomeInfo.getChrList(genome):
         chromLen = GenomeInfo.getChrLen(genome, chrom)
         region = GenomeRegion(genome, chrom, 0, chromLen)
         tv = track.getTrackView(region)
         sizeSegments = tv.endsAsNumpyArray() - tv.startsAsNumpyArray()
         sumSizes = sizeSegments.sum()
         segmentSize = segmentSize + [sizeSegments.tolist()]
         sumSegmentSize = sumSegmentSize + [sumSizes.tolist()]
         
     return sumSegmentSize
示例#8
0
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region)
        if len(intensityTV.valsAsNumpyArray()) == 0:
            raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \
                                          str(region) + \
                                          '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.')

        #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem..

        if intensityTV.trackFormat.isDense():
            assert intensityTV.trackFormat.isValued('number')
            return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, \
                                                                          weights, extras, intensityTV)
        else:
            raise NotImplementedError
    def _addPeaks(self):
        #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, self.trackName)
        track = PlainTrack(self.trackName)
        chromRegs = GlobalBinSource(genome)
        i = 0
        for region in chromRegs:
            if i > 2:
                break
            tv = track.getTrackView(region)
            starts = tv.startsAsNumpyArray()
            ends = tv.endsAsNumpyArray()

            for (start, end) in zip(starts, ends):
                self.peaks.append(Peak(self, region.chr, start, end))

            i += 1
    def __iter__(self):
        chr = self.chr
        trackName1, trackName2, w1, w2, genome = self.trackName1, self.trackName2, self.w1, self.w2, self.genome
        
        region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr) )

        track1 = PlainTrack(trackName1)
        tv1 = track1.getTrackView(region)
        vals1 = tv1.valsAsNumpyArray()
        
        track2 = PlainTrack(trackName2)
        tv2 = track2.getTrackView(region)
        vals2 = tv2.valsAsNumpyArray()
        
        for i in xrange(len(vals1)):
            yield w1*vals1[i] + w2*vals2[i]
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, origTrackFormat, region):
        if self._minimal:
            return numpy.array([]), None, None, None, None, None, None, OrderedDict()
        
        intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region)
        if len(intensityTV.valsAsNumpyArray())==0:
            raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \
                                          str(region) + \
                                          '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.')

        #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem..
        
        if intensityTV.trackFormat.isDense():
            assert intensityTV.trackFormat.isValued('number')
            return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, weights, extras, intensityTV)
        else:
            raise NotImplementedError            
示例#12
0
 def execute(choices, galaxyFn=None, username=''):
     '''
     Is called when execute-button is pushed by web-user. Should print
     output as HTML to standard out, which will be directed to a results page
     in Galaxy history. If getOutputFormat is anything else than HTML, the
     output should be written to the file with path galaxyFn. If needed,
     StaticFile can be used to get a path where additional files can be put
     (e.g. generated image files). choices is a list of selections made by
     web-user in each options box.
     '''
     print "<h2>Test tool<h2>"
     fastaTrack = PlainTrack(['Sequence', 'DNA'])
     for i in range(0, 500):
         seqTv = fastaTrack.getTrackView(
             GenomeRegion("hg19", "chr1", 1000000, 1001000))
         sequence = seqTv.valsAsNumpyArray()
         print sequence
 def getGenomicElements(genome, trackName):
     track = PlainTrack(trackName)
     genElements = []
     for chrom in GenomeInfo.getChrList(genome):
         chromLen = GenomeInfo.getChrLen(genome, chrom)
         region = GenomeRegion(genome, chrom, 0, chromLen)
         tv = track.getTrackView(region)
         for el in tv:
             #print chrom, el.start(), el.end() #, el.name()
             genElements = genElements + [[chrom, el.start(), el.end()]]
             
     return genElements
 
     #print numpy.version.version # 1.7.1 !!
     #unique, counts = numpy.unique(segmentSize, return_counts=True) # This is for numpy 1.9
     #print numpy.asarray((unique, counts)).T
     
     '''track.setFormatConverter('SegmentToMidPointFormatConverter')
    def __iter__(self):
        from gold.application.RSetup import r
        chr = self.chr
        trackName1, genome = self.trackName1, self.genome
        factor = self.factor
        region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr) )

        track1 = PlainTrack(trackName1)
        tv1 = track1.getTrackView(region)
        vals1 = tv1.valsAsNumpyArray()
        
        #scale between 0 and 1..:
        minVal, maxVal = vals1.min(), vals1.max()
        vals1 = (vals1 - minVal) * (1/(maxVal-minVal))
        for pos in xrange(len(vals1)):
            #print r.runif(1), vals1[pos]
            if r.runif(1) < factor*vals1[pos]:
                yield [pos,pos+1]
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges,
                                     weights, extras, region):
        universeTV = PlainTrack(self._trackNameUniverse).getTrackView(region)

        if universeTV.trackFormat.isDense():
            raise InvalidRunSpecException('Error: Universe needs to be a binary (non-dense) track')
        else:
            return self._createRandomizedNumpyArraysFromBinaryUniverse(
                binLen, starts, ends, vals, strands, ids, edges, weights, extras, universeTV)
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        referenceTV = PlainTrack(self._trackNameIntensity).getTrackView(
            region
        )  #self._trackNameIntensity based on naming convenience wrt. inheritance
        if len(referenceTV.valsAsNumpyArray()) == 0:
            raise InvalidRunSpecException('Error: No reference data available for sampling randomized locations in region' + \
                                          str(region) + \
                                          '. Please check that the reference track was created with the same main track that is being randomized in this analysis.')

        if referenceTV.trackFormat.isDense():
            raise InvalidRunSpecException(
                'Error: Cannot sample by distance to reference if reference is a dense track'
            )
        else:
            return self._createRandomizedNumpyArraysFromDistanceToReference(
                binLen, starts, ends, vals, strands, ids, edges, weights,
                extras, referenceTV)
    def __iter__(self):
        for pos in self._lowerOrderChain:
            from gold.track.Track import PlainTrack
            from quick.util.GenomeInfo import GenomeInfo
            from gold.track.GenomeRegion import GenomeRegion

            track = PlainTrack(GenomeInfo.getSequenceTrackName(self._genome))
            region = GenomeRegion(self._genome, self._chr, pos,
                                  pos + len(self._fullNmer))
            fullSubstring = (''.join(
                track.getTrackView(region).valsAsNumpyArray())).lower()
            pl = len(self._nmerPrefix)
            assert self._fullNmer[0:pl] == fullSubstring[
                0:
                pl], 'The prefix of lower order does not match at the positions given by the chain. %s vs %s. Region: %s' % (
                    self._fullNmer[0:pl], fullSubstring[0:pl], region)
            #print 'Comparing nmers: %s VS %s (at pos:%i).' % (self._fullNmer, fullSubstring, pos)
            if self._fullNmer == fullSubstring:
                yield pos
    def __iter__(self):
        from proto.RSetup import r
        chr = self.chr
        trackName1, genome = self.trackName1, self.genome
        factor = self.factor
        region = GenomeRegion(genome, chr, 0,
                              GenomeInfo.getChrLen(genome, chr))

        track1 = PlainTrack(trackName1)
        tv1 = track1.getTrackView(region)
        vals1 = tv1.valsAsNumpyArray()

        #scale between 0 and 1..:
        minVal, maxVal = vals1.min(), vals1.max()
        vals1 = (vals1 - minVal) * (1 / (maxVal - minVal))
        for pos in xrange(len(vals1)):
            #print r.runif(1), vals1[pos]
            if r.runif(1) < factor * vals1[pos]:
                yield [pos, pos + 1]
示例#19
0
def getFlatTracksTS(genome, guiSelectedGSuite):
    ts = FlatTracksTS()
    gsuite = getGSuiteFromGalaxyTN(guiSelectedGSuite)

    for gsTrack in gsuite.allTracks():
        assert gsTrack.trackName is not None, "Gstrack name is None %s" % gsTrack
        track = PlainTrack(gsTrack.trackName)
        metadata = OrderedDict(title=gsTrack.title, genome=str(genome))
        metadata.update(gsTrack.attributes)
        assert track is not None
        assert metadata is not None
        ts[gsTrack.title] = SingleTrackTS(track, metadata)
    return ts
    def execute(cls, choices, galaxyFn=None, username=''):

        outputFile =  open(galaxyFn, 'w')
        genome = choices[0]
        histItem = choices[2]
        trackItem = choices[3]
        chromRegsPath = GenomeInfo.getChrRegsFn(genome)
        
        chrSizeDict =  dict([ ( chr, GenomeInfo.getChrLen(genome, chr)) for chr in GenomeInfo.getChrList(genome)])
        geSource = headLinesStr = None
        if choices[1] == 'history':
            
            trackType = choices[2].split(':')[1]
            
            from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
            tempFn  = GalaxyRunSpecificFile(['fromHistory.'+trackType],galaxyFn).getDiskPath(True)
            
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
            open(tempFn,'w').write(open(fnSource,'r').read())
        
            if trackType in ['valued.bed', 'category.bed', 'bed']:
                geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__()
            
            elif trackType == 'gtrack':
                geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
                headLinesStr = geSource.getHeaderLines().replace('##','\n##')
            
            cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True)
            os.remove(tempFn)
        
        else:
            writeHeaderFlag = True
            for chr in GenomeInfo.getChrList(genome):
                gRegion = GenomeRegion(genome, chr, 0, chrSizeDict[chr])
                plTrack = PlainTrack(trackItem.split(':'))
                geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__()
                cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag)
                writeHeaderFlag = False    
        outputFile.close()
示例#21
0
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        referenceTV = PlainTrack(self._trackNameIntensity).getTrackView(
            region
        )  #self._trackNameIntensity based on naming convenience wrt. inheritance

        if referenceTV.trackFormat.isDense():
            raise InvalidRunSpecException(
                'Error: Intensity needs to be a binary (non-dense) track')
        else:
            return self._createRandomizedNumpyArraysFromBinaryIntensity(
                binLen, starts, ends, vals, strands, ids, edges, weights,
                extras, referenceTV)
示例#22
0
    def getMutatedSequence(cls, genome, regionDict, pointDict=None):
        resultDict = defaultdict(list)
        regionList = []
        fastaTrack = PlainTrack(['Sequence', 'DNA'])
        for chrom in regionDict.keys():
            for start, end in regionDict[chrom]:

                seqTv = fastaTrack.getTrackView(
                    GenomeRegion(genome, chrom, start, end))
                valList = list(seqTv.valsAsNumpyArray())
                if pointDict:
                    mutatedPoints = [
                        v[1:] for v in pointDict[chrom] if v[0] == start
                    ]
                    for index, val in mutatedPoints:
                        val = val[-1] if val.find('>') >= 0 else val
                        valList[index] = val
                resultDict[chrom].append(
                    '>%s %i-%i\n%s' %
                    (chrom, start + 1, end, ''.join(valList)))

        return resultDict
def smoothPoints(genome, inTrackName, windowSize, chr):
    from gold.extra.SlidingWindow import SlidingWindow
    from quick.util.GenomeInfo import GenomeInfo
    from gold.track.Track import PlainTrack
    from gold.track.GenomeRegion import GenomeRegion
    
    #func = lambda x: ( sum( [r.dnorm(i-len(x)/2.0,0,2000)*x[i].end for i in range(len(x)) if x[i]!=None] ) / sum( [r.dnorm(i-len(x)/2.0,0,2000)*1 for i in range(len(x)) if x[i]!=None] ) ) if len([y for y in x if y!=None])>0 else 0    
    
    chrReg = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome,chr) )
            #chrReg = GenomeElement(genome, chr, 0, 3000)
    inTrackView = PlainTrack(inTrackName).getTrackView(chrReg)
    print [x.end() for x in inTrackView]
    slidingWindows = SlidingWindow(GenomeElementTvWrapper(inTrackView), windowSize)
    print [x for x in weightedValForWindowsYielder(slidingWindows, windowSize)]
 def execute(cls, choices, galaxyFn=None, username=''):
     outputFile =  open(galaxyFn, 'w')
     genome = choices[0]
     histItem = choices[2]
     trackItem = choices[3]
     chromRegsPath = GenomeInfo.getChrRegsFn(genome)
     
     chrSizeDict =  dict([ ( chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)])
     geSource = headLinesStr = None
     if choices[1] == 'History':
         
         trackType = choices[2].split(':')[1]
         username = ''.join([chr(random.randint(97,122)) for i in range(6)]) 
         tempFn = createCollectedPath(genome, [], username+'_'.join([str(v) for v in time.localtime()[:6]])+'.'+trackType)
         fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
         open(tempFn,'w').write(open(fnSource,'r').read())
         
         
         if trackType in ['marked.bed', 'category.bed', 'bed']:
             geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__()
         
         elif trackType == 'gtrack':
             geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
             headLinesStr = geSource.getHeaderLines().replace('##','\n##')
         
         cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True)
         os.remove(tempFn)
     
     else:
         writeHeaderFlag = True
         for chrom in GenomeInfo.getChrList(genome):
             gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom])
             plTrack = PlainTrack(trackItem.split(':'))
             geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__()
             cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag)
             writeHeaderFlag = False    
     outputFile.close()
示例#25
0
 def extractToFile(self, fn, outTrackName):
     append = False
     for region in GlobalBinSource(self._genome):
         print 'Creating segmentation for chr: ', region.chr
         trackView = PlainTrack(self._inTrackName).getTrackView(region)
         teSource = FunctionCategorizerWrapper(trackView,
                                               self._categorizerMethod,
                                               minSegLen=self._minSegLen)
         teSource.trackFormat = TrackFormat.createInstanceFromPrefixList(
             ['start', 'end', 'val'])
         TrackExtractor._extract(teSource,
                                 outTrackName,
                                 region,
                                 fn,
                                 append=append,
                                 globalCoords=True,
                                 addSuffix=True)
         append = True
    def get_reference_allele(genome, chr, pos, len=1):
        pos = pos.strip()
        if not pos.isdigit() or int(pos) < 0:
            return None
        bpos = int(pos) - 1

        try:
            genReg = GenomeRegion(genome, chr, bpos, bpos + len)
            seqTV = PlainTrack(
                GenomeInfo.getSequenceTrackName(genome)).getTrackView(genReg)
            #ge = seqTV.next()
            #return ge.val().upper()
            seq = ""
            for ge in seqTV:
                seq += ge.val().upper()
            return seq
        except Exception as e:
            print e
            return '-'
    def __iter__(self):
        chr = self.chr
        trackName1, trackName2, w1, w2, genome = self.trackName1, self.trackName2, self.w1, self.w2, self.genome

        region = GenomeRegion(genome, chr, 0,
                              GenomeInfo.getChrLen(genome, chr))

        track1 = PlainTrack(trackName1)
        tv1 = track1.getTrackView(region)
        vals1 = tv1.valsAsNumpyArray()

        track2 = PlainTrack(trackName2)
        tv2 = track2.getTrackView(region)
        vals2 = tv2.valsAsNumpyArray()

        for i in xrange(len(vals1)):
            yield w1 * vals1[i] + w2 * vals2[i]
from gold.track.Track import PlainTrack
from gold.track.GenomeRegion import GenomeRegion
from gold.statistic.CountStat import CountStat

#create a track
track = PlainTrack(['Genes and gene subsets', 'Genes', 'Refseq'])

#create a region of interest
region = GenomeRegion('hg18', 'chr1', 1000, 900000)

#create a statistic
stat = CountStat(region, track)

print stat.getResult()

#What happens now:
#CountStat inherits MagicStatFactory
#MagicStatFactory determines that region may be splitted to smaller bins and looks for a CountStatSplittable.
#CountStatSplittable exists, and is instantiated.
#getResults first calls createChildren. CountStatSplittable now creates a new CountStat for a smaller first region.
#This times, when MagicStatFactory handles CountStat-creation it sees that the region in question should not be splitted.
#MagicStatFactory thus instantiates a CountStatUnsplittable, which loads track data, and does the count for its small bin.
#This is repeated for each small bin, and results are collected by CountStatSplittable.
#Finally, the method combineResults (of CountStatSplittable) computes the total results for the queried region and returns this.
from gold.track.Track import Track, PlainTrack
from proto.hyperbrowser.HtmlCore import HtmlCore
from quick.application.ExternalTrackManager import ExternalTrackManager
from quick.application.UserBinSource import GlobalBinSource
from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN

snps = [[] for chromosome in range(0, 25)]
snpsTrack = None

peaks = [[] for chromosome in range(0, 25)]
transcription_factors = []
motif = None
motifs = {}  # Dict holding all the motifs
regions = []
genome = None
fastaTrack = PlainTrack(['Sequence', 'DNA'])

BINDING_PROB_TRESHOLD = 0.5
BINDING_A_PRIORI_PROB = 0.01


class SNP():
    def __init__(self, chromosome, position, mutationFrom, mutationTo):
        self.chromosome = chromosome
        self.position = position
        self.mutationFrom = mutationFrom
        self.mutationTo = mutationTo

    def __repr__(self):
        return "Pos: %s %s - %s" % (self.position, self.mutationFrom,
                                    self.mutationTo)
示例#30
0
 def _getGeSourceForRegion(cls, genome, outTrackName, region, inTrackName,
                           windowSize, func):
     inTrackView = PlainTrack(inTrackName).getTrackView(region)
     geSource = CustomTrackGenomeElementSource(TrackViewBasedSlidingWindow(inTrackView, windowSize),\
                                               genome, outTrackName, region.chr, func)
     return geSource
示例#31
0
 def execute(cls, choices, galaxyFn=None, username=''):
     #val = strVal.split(':')[1].split('k')[0];
     htmlTemplate = '''<html><head>\n\n<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet" type="text/css"/>\n  <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.5/jquery.min.js"></script>\n  <script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/jquery-ui.min.js"></script>\n
     <script type='text/javascript' src='https://www.google.com/jsapi'></script>
     <script type='text/javascript'> 
       google.load("visualization", "1", {packages:["corechart"]});\n google.setOnLoadCallback(drawLine);
       function drawLine(divId) {\n}
   </script>
     <style type="text/css">\n    #slider { margin: 10px; }\n  </style>\n  <script type="text/javascript">\n  jQuery(document).ready(function() {\n    jQuery("#slider").slider({min: 0, value: 370, max: %i });\n  });\n  </script>\n\n\n  <link rel="stylesheet" type="text/css" href="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/styles/stylesheet.css" />
                 \n<script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2.1-core.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2-more.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/ImageZoom.js"></script>\n
     \n\n\n\n<script type="text/javascript" >\nliste =%s;\ncounter = 0;\n\n\nfunction point_it2(event){\n
     document.myform.posAnchor.value = "";
     chrom = %s;\n
     trackNames = %s;
     pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;\n	pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;\n        factor = %i;\n        pos_x = Math.floor(pos_x/factor);\n	pos_y = Math.floor(pos_y/factor);\n	counter++;\n
     var strVal = liste[pos_y][pos_x];
     var strTab = strVal.split(",");
     
     
     val = strTab[0];
     streng = chrom+":"+strTab[0]+"k | ";
     for(i=0; i<trackNames.length; i++) { 
         streng = streng + trackNames[i]+': '+strTab[i+1]+'%% | ';
        }
         
     document.myform.posAnchor.value = streng;\n
     jQuery( "#slider" ).slider( "option", "value", val );\n
     
             }\n</script>\n\n\n\n\n</head>
     <body>
     <h2 align="center" style="color:#FF7400;">Heatmap for chromosome %s</h2> 
     <div id="slider" ></div><br>
     \n<form name="myform" action="http://www.mydomain.com/myformhandler.cgi" method="POST">\n<div align="center">\n\n<input type="text" name="posAnchor" size="250" value=".">\n<br>\n</div>\n</form>\n<br>
     <div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb">\n<a href="%s" target="_blank" >\n<img src="%s" /></a></div><!-- Image zoom end --></div>\n\n\n%s
      
      <br/>%s</body></html>''' # onchange="jQuery('zoomer_region').css({ 'left': '31px', 'top': '15px'});"
     
     tableRowEntryTemplate = """<div class="tabbertab"><h2>%s</h2><a href="%s"><img src="%s" /></a></div>"""
     htmlPageTemplate = """<html><head>\n<script type="text/javascript" src="/gsuite/static/scripts/tabber.js"></script>\n<link href="/gsuite/static/style/tabber.css" rel="stylesheet" type="text/css" />\n
                 </head><body>%s</body></html>"""
     
     #fileDict = dict()
     binsize = parseShortenedSizeSpec(choices[10])
         
     tnList = []
     trackNameList = []
     genome = choices[0]
     chrLength = GenomeInfo.getStdChrLengthDict(genome)
     
     for index in [1,4,7]:
         startTime = time.time()
         if choices[index] in ['-- No track --','',None]:
             tnList.append(None)
             trackNameList.append('.')
             continue
         elif choices[index] == 'history':
             #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(choices[0], choices[index+1].split(':'))
             trackName = choices[index+1].split(':')
             tnList.append(ExternalTrackManager.extractFnFromGalaxyTN(trackName))
             trackNameList.append(prettyPrintTrackName(trackName))
         else:
             trackName = choices[index+2].split(':')
             track = PlainTrack(trackName)
             regionList = [GenomeRegion(genome, chrom, 0, chrLength[chrom]) for chrom in GenomeInfo.getChrList(genome)]
             tnList.append((track, regionList))    
             trackNameList.append(prettyPrintTrackName(trackName))
             
             
     trackNames = repr([v for v in trackNameList if v!='.'])
     tr1, tr2, tr3 = tnList
     ResultDicts = []
     maxVals = []#list of the maximum coverage in a bin for each track Used for normalization purposes
     chrsWithData = set()# set of chromosomes with any data. No point in generating images with no data...
     microDictList = []
     counter = 0
     for tr,color in [(tr1, (1,0,0)),(tr2, (0,1,0)),(tr3, (0,0,1))]:
         
         maxVal = None
         if tr:
             if len([1 for v in tnList if v]) == 1:
                 color = (1,1,1)
             
             res, microDict, maxVal, trackChrs = cls.getValuesFromBedFile(genome, tr,color, binsize)
             microDictList.append((trackNames[counter],microDict))
             counter+=1
             chrsWithData = chrsWithData|trackChrs
             ResultDicts += [res]   
         maxVals.append(maxVal)
         
     
     htmlTableContent = []
     resultDict = cls.syncResultDict(ResultDicts)
     binfactor = binsize/1000
     for chrom in sorted(list(chrsWithData), cmp=alphanum):
         valList = resultDict[chrom]
         areaList = []
         #For doing recursive pattern picture
         bigFactor = int(10*(binsize/10000.0)**(0.5))
         smallFactor = bigFactor/3
         posMatrix = cls.getResult(len(valList), 2,2)
         javaScriptList = [[0 for v in xrange(len(posMatrix[0])*bigFactor) ] for t in xrange(len(posMatrix)*bigFactor)]
         rowLen = len(posMatrix[0])
         
         im = Image.new("RGB", (rowLen, len(posMatrix)), "white")
         for yIndex, row in enumerate(posMatrix):
             for xIndex, elem in enumerate(row):
                 im.putpixel((xIndex, yIndex), valList[elem])
                 region = yIndex*rowLen + xIndex
                 #for yVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #    for xVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #        javaScriptList[yVals][xVals] = chrom+':'+str(elem)+'-'+str(elem+1)+': '+repr([ v/255.0 for v in valList[elem]])
                 
                 #javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*binfactor)+'k - '+str((elem+1)*binfactor)+'k : '+repr([ trackNameList[indx]+'='+str(round(v*100/255.0, 2))+'%' for indx, v in enumerate(valList[elem])])
                 javaScriptList[yIndex][xIndex] = ','.join([str(elem*binfactor)]+[ str(round(v*100/255.0, 2)) for indx, v in enumerate(valList[elem]) if trackNameList[indx] !='.'] )
         for i in range(len(javaScriptList)):
             javaScriptList[i] = [v for v in javaScriptList[i] if v !=0]
             
     
         imSmall = im.resize((len(posMatrix[0])*smallFactor, len(posMatrix)*smallFactor))
         im2 = im.resize((len(posMatrix[0])*bigFactor, len(posMatrix)*bigFactor))
         
         fileElements = [GalaxyRunSpecificFile(['Recursive', chrom+'.png' ], galaxyFn ), GalaxyRunSpecificFile(['Recursive', chrom+'Big.png' ], galaxyFn), GalaxyRunSpecificFile(['Recursive', chrom+'Zooming.html' ], galaxyFn)]
         #fileDict['Recursive/'+chrom] = fileElements
         imSmall.save(fileElements[0].getDiskPath(ensurePath=True))
         im2.save(fileElements[1].getDiskPath(ensurePath=True))
         
         trackAndValRangeTab = zip(trackNameList, maxVals)
         colorTab = []
         onlyOneTrack = True if len([v for v in maxVals if v]) ==1 else False
         for color, vals in [('Red_combination',[1,0,0]), ('Green_combination',[0,1,0]), ('Blue_combination',[0,0,1]),('Red-Green_combination',[1,1,0]), ('Red-Blue_combination',[1,0,1]), ('Green-Blue_combination',[0,1,1]), ('Red-Green-Blue_combination',[1,1,1])]:    
             
             if not None in [maxVals[i] for i in range(len(vals)) if vals[i]>0]:
                 im = Image.new("RGB", (256 , 1), "white")
                 tracksInvolved = ' & '.join([str(index+1) for index, v in enumerate(vals) if v>0])
                 if onlyOneTrack:
                     vals = [1,1,1]
                 for val in range(256):
                     colVal = [val*v for v in vals]
                     
                     im.putpixel((val,0), tuple(colVal))
                 imColFile = GalaxyRunSpecificFile(['Recursive', color+'.png' ], galaxyFn)
                 imCol = im.resize((256, 10))
                 imCol.save(imColFile.getDiskPath(ensurePath=True))
                 colorTab.append('<tr><td>Track %s</td><td>  <img src="%s" /></td></tr>'% (tracksInvolved, imColFile.getURL()))
                 
         
         htmlTnRangeVals= '<br/><br/><table align="center"  cellspacing="10"><tr><th>Track number</th><th>Track name</th><th>Value range</th></tr>\n'
         htmlTnRangeVals += '\n'.join(['<tr/><td>Track %i </td><td>%s</td><td> 0 - %i</td></tr>' % (index+1, v[0], v[1]) for index, v in  enumerate(trackAndValRangeTab) if v[1]] )
         htmlTnRangeVals+='</table> <br/><table align="center"  cellspacing="10"><tr><th>Track combination</th><th>Colour range</th></tr>' + '\n'.join(colorTab) + '</table>\n'
         lineTabStr= ''
         #if chrom == 'chr1':
         #    tempList = [range(100)]+[v[1]['chr1'][26] for v in microDictList]
         #    chartTemplate =  "['%i',  %i, %i, %i]"
         #    lineTab = [ chartTemplate % v for v in zip(*tempList)]    
         #    lineTemplate = """<div id="%s" onclick="{\nvar data = google.visualization.arrayToDataTable([\n    %s\n  ]);\nvar options = {  title: 'Detailed Graph'    };var chart = new google.visualization.LineChart(document.getElementById('%s'));chart.draw(data, options);}" style="width: 1000px; height: 700px;"></div>"""
         #    lineTabStr = lineTemplate % ('line_div', ', '.join(lineTab),'line_div')    
         open(fileElements[2].getDiskPath(ensurePath=True),'w').write(htmlTemplate % (int(GenomeInfo.getChrLen(genome, chrom)/1000.0)+1, repr(javaScriptList), repr(chrom), trackNames,bigFactor, chrom, fileElements[1].getURL(), fileElements[0].getURL(), htmlTnRangeVals, lineTabStr) )# 
         htmlTableContent.append(tableRowEntryTemplate % (chrom, fileElements[2].getURL(), fileElements[0].getURL()))
         
         # FOr doing normal picture
         #columns = int(round((len(valList)/1000)+0.5))
         #im = Image.new("RGB", (1000, columns), "white")        
         #y=-1    
         #for index, valuTuple in enumerate(valList):
         #    x = index%1000
         #
         #    if x == 0:
         #        y+=1
         #    try:
         #        im.putpixel((x, y), valuTuple)
         #    except:
         #        pass
         #im.save(chrom+'.png')
         #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png'))
     
     tabberMal = '<div class="tabber">%s</div>'
     #tempRes, res = [],[]
     res = [tabberMal % v for v in htmlTableContent]
     #for i in htmlTableContent:
     #    if len(tempRes) == 10:
     #        res.append(tabberMal % '\n'.join(tempRes))
     #        tempRes = []
     #    tempRes.append(i)
     #if len(tempRes)>0:
     #    res.append(tabberMal % '\n'.join(tempRes))
     open(galaxyFn,'w').write(htmlPageTemplate % ('<br/>'.join(res)))
示例#32
0
def getSingleTrackTS(genome, guiSelectedTrack, title='Dummy'):
    trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, guiSelectedTrack)
    return SingleTrackTS(PlainTrack(trackName), {'title': title})
示例#33
0
 def __init__(self, userBinSource, genome, **kwArgs):
     track = PlainTrack(GenomeInfo.getPropertyTrackName(genome, 'gaps'))
     StatJob.__init__(self, userBinSource, track, None, AssemblyGapCoverageStat, **kwArgs)
示例#34
0
def create_track(file_name, trackName):
    from gtrackcore.core.Api import importFile
    importFile(file_name, genome="hg18", trackName=trackName)
    t = PlainTrack([trackName])
    single_track_ts = SingleTrackTS(t, {"title": trackName})
    return single_track_ts
示例#35
0
    def execute(cls, choices, galaxyFn=None, username=''):

        shelveDict = {
            'track1':
            choices[3] if choices[3] != cls.NO_TRACK_SHORTNAME else None
        }
        shelveDict['track2'] = choices[
            5] if choices[5] != cls.NO_TRACK_SHORTNAME else None
        print len(choices)
        print cls._extraParams
        for i in range(len(cls._extraParams)):
            index = i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1
            shelveDict[index] = choices[index].strip()

        DebugInfoShelve = safeshelve.open(cls.SHELVE_FN)
        DebugInfoShelve[choices[0]] = shelveDict
        DebugInfoShelve.close()

        try:

            from gold.application.LogSetup import setupDebugModeAndLogging
            setupDebugModeAndLogging()

            print 'Getting Unsplittable statClass'
            statClassName = choices[0]
            #statClass = STAT_CLASS_DICT[statClassName]
            #try:

            print 'Preparing arguments to init'
            unsplittableStatClass = MagicStatFactory._getClass(
                statClassName, 'Unsplittable')
            genome = choices[1]

            from gold.track.Track import PlainTrack
            prefixTN1 = cls.STD_PREFIX_TN if choices[2] == 'yes' else []
            tn1 = prefixTN1 + choices[3].split(':')
            track1 = PlainTrack(
                tn1) if choices[3] != cls.NO_TRACK_SHORTNAME else None
            prefixTN2 = cls.STD_PREFIX_TN if choices[4] == 'yes' else []
            tn2 = prefixTN2 + choices[5].split(':')
            track2 = PlainTrack(
                tn2) if choices[5] != cls.NO_TRACK_SHORTNAME else None
            from gold.track.GenomeRegion import GenomeRegion
            #region = GenomeRegion(genome, 'chr1',1000,2000)
            #region2 = GenomeRegion(genome, 'chr1',5000,6000)

            kwArgs = {}
            regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1]
            binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 3]
            ubSource = UserBinSource(regVal, binSpecVal, genome=genome)
            region = list(ubSource)[0]

            if len(cls._extraParams) > 3:
                for i in range(len(cls._extraParams)):
                    paramName = choices[i * 2 +
                                        cls.FIRST_EXTRA_PARAM_BOX_NUMBER]
                    param = paramName[:paramName.find('(')].strip()
                    val = choices[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER +
                                  1].strip()
                    if val != '':
                        kwArgs[param] = val
                        shelveDict[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER +
                                   1] = val

            print 'Calling __init__'
            #
            statObj = unsplittableStatClass(region, track1, track2, **kwArgs)

            print 'Calling createChildren'
            statObj.createChildren()

            print 'Calling getResult'
            statObj.getResult()

            #except:
            #    raise

            #print 'Preparing arguments to init'
            #genome = 'hg18'
            #prefixTN = ['DNA structure'] if choices[2] == 'yes' else []
            #from gold.track.Track import PlainTrack
            #tn1 = prefixTN + choices[3].split(':')
            #track1 = PlainTrack(tn1)
            #tn2 = prefixTN + choices[5].split(':')
            #track2 = PlainTrack(tn2)
            #from gold.track.GenomeRegion import GenomeRegion
            ##region = GenomeRegion(genome, 'chr1',1000,2000)
            ##region2 = GenomeRegion(genome, 'chr1',5000,6000)
            #
            #kwArgs = {}
            #regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1]
            #binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+3]
            #ubSource = UserBinSource(regVal, binSpecVal, genome=choices[1])
            #region = list(UserBinSource)[0]
            #
            #if len(cls._extraParams)>2:
            #    for i in range(2,len(cls._extraParams)):
            #        paramName = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER]
            #        param = paramName[:paramName.find('(')].strip()
            #        val = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1].strip()
            #        if val !='':
            #            kwArgs[param] = val
            #            shelveDict[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] = val
            #
            #
            ##extraParams += [v.strip() for v in choices.kwArgs.split(',')] if choices.kwArgs.strip() != '' else []
            ##args = [region, track1, track2]
            #
            #print 'Calling __init__'
            ##
            #statObj = unsplittableStatClass(region, track1, track2, **kwArgs)
            #
            #print 'Calling createChildren'
            #statObj.createChildren()
            #
            #print 'Calling getResult'
            #statObj.getResult()

            print 'Running StatJob'
            magicStatClass = STAT_CLASS_DICT[statClassName]
            #res = StatJob([region,region2],track1,track2,magicStatClass,**kwArgs).run()
            res = StatJob(ubSource, track1, track2, magicStatClass,
                          **kwArgs).run()
            from quick.application.GalaxyInterface import GalaxyInterface
            GalaxyInterface._viewResults([res], galaxyFn)

        except Exception, e:
            print 'Error: ', e
            raise
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    The Genomic HyperBrowser is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with The Genomic HyperBrowser.  If not, see <http://www.gnu.org/licenses/>.

from gold.track.Track import PlainTrack
from gold.track.GenomeRegion import GenomeRegion

#create a track
track = PlainTrack(['Genes and gene subsets','Genes','Refseq'])
#track = PlainTrack(['DNA structure','Bendability'])

#create a region of interest
region = GenomeRegion('hg18','chr1',1000,900000)

#Could instead have been iterator of regions, e.g. genome-wide:
#from quick.application.UserBinSource import UserBinSource
#regionIter = UserBinSource('*','*','hg18')
#for region in regionIter:
#    pass
#print 'Last region of iter: ', region

#iterate through elements of the track in this region
for element in track.getTrackView(region):
    #just print the intervals for now..