Пример #1
0
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:    
                assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' %  (oldPath, newPath)
Пример #2
0
    def _renameTrackNameIfIllegal(self, trackName):
        legalTrackName = [replaceIllegalElementsInTrackNames(x) for x in trackName]

        if legalTrackName != trackName and os.path.exists(createDirPath(trackName, self._genome)):
            renameTrack(self._genome, trackName, legalTrackName)

        return legalTrackName
Пример #3
0
 def removeOutdatedPreProcessedFiles(genome, trackName, allowOverlaps, mode):
     collector = PreProcMetaDataCollector(genome, trackName)
     if PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and not \
         collector.hasRemovedPreProcFiles(allowOverlaps):
             dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
             
             assert dirPath.startswith(Config.PROCESSED_DATA_PATH), \
                 "Processed data path '%s' does not start with '%s'" % \
                 (dirPath, Config.PROCESSED_DATA_PATH)
             if mode == 'Real':
                 print 'Removing outdated preprocessed data: ', dirPath
                 for fn in os.listdir(dirPath):
                     fullFn = os.path.join(dirPath, fn)
                     if os.path.isfile(fullFn):
                         os.unlink(fullFn)
                     if os.path.isdir(fullFn):
                         if PreProcessUtils._isOldTypeChromDirectory(fullFn, genome):
                             shutil.rmtree(fullFn)
             else:
                 print 'Would now have removed outdated preprocessed data if real run: ', dirPath
             
             collector.updateRemovedPreProcFilesFlag(allowOverlaps, True)
     
     if mode == 'Real':
         ti = TrackInfo(genome, trackName)
         ti.resetTimeOfPreProcessing()
Пример #4
0
 def removeChrMemmapFolders(genome, trackName, allowOverlaps):
     chrList = PreProcMetaDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps)
     for chr in chrList:
         path = createDirPath(trackName, genome, chr, allowOverlaps)
         assert os.path.exists(path), 'Path does not exist: ' + path
         assert os.path.isdir(path), 'Path is not a directory: ' + path
         shutil.rmtree(path)
Пример #5
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
Пример #6
0
def renameTrack(genome, oldTn, newTn):
    assert newTn != oldTn[:len(newTn)], 'ERROR: it is not allowed to move a track into itself (%s -> %s)' % (':'.join(oldTn), ':'.join(newTn))

    #First check to filter out misspellings..
    oldPath = createDirPath(oldTn, genome)
    assert os.path.exists(oldPath), 'ERROR: TN did not exist in processed tracks: ' + oldPath
    
    #renaming TI first, in case of problems, such as incomplete records..
    renameTrackInfo(genome, oldTn, newTn)
    try:
        renameStdTrack(genome, oldTn, newTn)
    except Exception, e:
        print e
Пример #7
0
    def _preProcess(self, trackName, noOverlapsFileCount=None, withOverlapsFileCount=None, \
                    noOverlapsChrElCount=None, withOverlapsChrElCount=None, customBins={}):
        trackName = self.TRACK_NAME_PREFIX + trackName
        noOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=False)
        withOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=True)
        self._removeDir(noOverlapsPath, trackName)
        self._removeDir(withOverlapsPath, trackName)

        self._runWithProfiling('PreProcessAllTracksJob(' + repr(self.GENOME) + ',' + repr(trackName) + ', username="******").process()',\
                                   globals(), locals())

        if noOverlapsFileCount is not None:
            self.assertEquals(noOverlapsFileCount, len([x for x in os.listdir(noOverlapsPath) if not x.startswith('.')]))

        if withOverlapsFileCount is not None:
            self.assertEquals(withOverlapsFileCount, len([x for x in os.listdir(withOverlapsPath) if not x.startswith('.')]))

        if noOverlapsChrElCount is not None:
            self.assertChrElCounts(trackName, noOverlapsChrElCount, False, customBins)

        if withOverlapsChrElCount is not None:
            self.assertChrElCounts(trackName, withOverlapsChrElCount, True, customBins)
Пример #8
0
    def getSubtypes(genome, trackName, fullAccess=False):
        dirPath = createDirPath(trackName, genome)
        subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \
                    if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \
                    or GenomeInfo.isValidChr(genome, fn))]

        #fixme, just temporarily:, these dirs should start with _
        subtypes= [x for x in subtypes if not x in ['external','ucsc'] ]
        
        #if not fullAccess and not ProcTrackOptions._isLiteratureTrack(genome, trackName):
        #    subtypes = [x for x in subtypes if not TrackInfo(genome, trackName+[x]).private]

        return sorted(subtypes, key=str.lower)
Пример #9
0
 def __init__(self, genome, trackName, allowOverlaps):
     assert allowOverlaps in [False, True]
     
     self._genome = genome
     self._trackName = trackName
     
     self._fn = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME
     self._contents = {} #None
     self._updatedChrs = set([])
     
     from gtrackcore_memmap.input.userbins.UserBinSource import MinimalBinSource
     minimalBinList = MinimalBinSource(genome)
     self._minimalRegion = minimalBinList[0] if minimalBinList is not None else None
Пример #10
0
 def _createOutputDirectory(self, genome, chr, trackName, allowOverlaps, geSourceManager):
     dirPath = createDirPath(trackName, genome, chr, allowOverlaps)
     
     from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo
     return  OutputDirectory(dirPath, geSourceManager.getPrefixList(), \
                             geSourceManager.getNumElementsForChr(chr), \
                             GenomeInfo.getChrLen(genome, chr), \
                             geSourceManager.getValDataType(), \
                             geSourceManager.getValDim(), \
                             geSourceManager.getEdgeWeightDataType(), \
                             geSourceManager.getEdgeWeightDim(), \
                             geSourceManager.getMaxNumEdgesForChr(chr), \
                             geSourceManager.getMaxStrLensForChr(chr), \
                             geSourceManager.isSorted())
Пример #11
0
 def preProcFilesExist(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     preProcFilesExist = collector.preProcFilesExist(allowOverlaps)
     if preProcFilesExist is None:
         dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
         if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists():
             preProcFilesExist = True
             #    any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \
             #         for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) )
         else:
             if os.path.exists(dirPath):
                 preProcFilesExist = PreProcessUtils._hasOldTypeChromSubDirs(dirPath, genome)
             else:
                 preProcFilesExist = False
         collector.updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist)
     return preProcFilesExist
Пример #12
0
    def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \
                addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False):
        from gtrackcore_memmap.input.adapters.TrackGenomeElementSource import TrackGenomeElementSource
        from gtrackcore_memmap.extract.fileformats.FileFormatComposer import getComposerClsFromFileFormatName, getComposerClsFromFileSuffix
        
        assert len(regionList) > 0
        for region in regionList:
            genome = region.genome
            break
        
        #To silently extract correctly if track type is dense
        if allowOverlaps:
            allowOverlaps = os.path.exists(createDirPath(trackName, genome, allowOverlaps=True))
            
        trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \
                                                 allowOverlaps=allowOverlaps, printWarnings=False)
        
        composerCls = None
        if asOriginal:
            ti = TrackInfo(genome, trackName)
            if ti.fileType != '':
                try:
                    composerCls = getComposerClsFromFileSuffix(ti.fileType)
                except:
                    pass
        
        if composerCls is None:
            composerCls = getComposerClsFromFileFormatName(fileFormatName)

        if addSuffix:
            fn = os.path.splitext(fn)[0] + '.' + composerCls.getDefaultFileNameSuffix()
        
        composer = composerCls(trackGESource)
        ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty)
        
        if ok:
            return fn
Пример #13
0
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)
        
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
Пример #14
0
 def _removeAllTrackData(self, trackName, removeOrigData=True):
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName)
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName)
     if removeOrigData:
         self._removeDir(createOrigPath(self.GENOME, trackName), trackName)
Пример #15
0
 def _preProcess(self, trackName):
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName)
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName)
     PreProcessAllTracksJob(self.GENOME, trackName, username="******").process()
Пример #16
0
def _getDirPath(genome=""):
    from gtrackcore_memmap.util.CommonFunctions import createDirPath, ensurePathExists

    dirPath = createDirPath([], "")
    ensurePathExists(dirPath)
    return dirPath
Пример #17
0
 def _getDirContents(genome, trackName):
     dirPath = createDirPath(trackName, genome)
     return os.listdir(dirPath) if os.path.exists(dirPath) else []    
 def setUp(self):
     self._path = createDirPath(['testBoundingRegionShelve'], 'TestGenome', allowOverlaps=False)
     self._fn = self._path + os.sep + 'boundingRegions.shelve'