Exemplo n.º 1
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
Exemplo n.º 2
0
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)
        
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray