def _writeTestFile(self, case): fn = createOrigPath(self.GENOME, self.TRACK_NAME_PREFIX + case.trackName, 'testfile' + case.suffix) ensurePathExists(fn) testfile = open(fn, 'w') testfile.write('\n'.join(case.headerLines + case.lines)) testfile.close() return fn
def renameStdTrack(genome, oldTn, newTn): oldPath = createOrigPath(genome, oldTn) assert os.path.exists(oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath print '(renaming track in stdTracks..)' newPath = createOrigPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) print 'Moving from %s to %s' % (oldPath, newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def __new__(cls, genome, trackName): #Temporary hack if genome in ['hg18','NCBI36']: genome = 'NCBI36' ensurePathExists(cls.SHELVE_FN) trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'c', protocol=cls.PROTOCOL) stored = trackInfoShelve.get( constructKey(genome, trackName) ) trackInfoShelve.close() if stored is not None: return stored else: return object.__new__(cls)
def renameProcTrack(genome, oldTn, newTn): for allowOverlaps in [False, True]: oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps) if not os.path.exists(oldPath): print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') else: print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')' newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False, \ ignoreEmpty=True): ensurePathExists(zipFn) zipFile = ZipFile(zipFn, 'w') for region in regionList: fn = os.path.dirname(zipFn) + os.sep + str(region).replace(':','_') okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \ globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \ allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty) if okFn: zipFile.write(okFn, os.path.basename(okFn)) os.remove(okFn) zipFile.close()
def importFile(fileName, genome, trackName): """fileName genome trackName""" trackName = _convertTrackName(trackName) from gtrackcore_memmap.util.CommonFunctions import createOrigPath, ensurePathExists origFn = createOrigPath(genome, trackName, os.path.basename(fileName)) if os.path.exists(origFn): shutil.rmtree(os.path.dirname(origFn)) ensurePathExists(origFn) shutil.copy(fileName, origFn) os.chmod(origFn, 0664) from gtrackcore_memmap.preprocess.PreProcessTracksJob import PreProcessAllTracksJob PreProcessAllTracksJob(genome, trackName).process()
def _getDirPath(genome=""): from gtrackcore_memmap.util.CommonFunctions import createDirPath, ensurePathExists dirPath = createDirPath([], "") ensurePathExists(dirPath) return dirPath
def composeToFile(self, fn, ignoreEmpty=False, **kwArgs): ensurePathExists(fn) f = open(fn, "w") ok = self._composeCommon(f, ignoreEmpty, **kwArgs) f.close() return ok
def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse): assert sparse in [False, True] tempContents = OrderedDict() genomeElementChrs = set(genomeElementChrList) lastRegion = None chrStartIdxs = OrderedDict() chrEndIdxs = OrderedDict() totElCount = 0 totBinCount = 0 for br in boundingRegionTuples: if lastRegion is None or br.region.chr != lastRegion.chr: if br.region.chr in tempContents: raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region) lastRegion = None tempContents[br.region.chr] = OrderedDict() if sparse: chrStartIdxs[br.region.chr] = totElCount else: if br.region < lastRegion: raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region)) if lastRegion.overlaps(br.region): raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region)) if lastRegion.end == br.region.start: raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region)) if len(br.region) < 1: raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region) if not sparse and len(br.region) != br.elCount: raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount)) startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None) totElCount += br.elCount if sparse: chrEndIdxs[br.region.chr] = totElCount tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0) lastRegion = br.region if sparse: totBinCount = 0 for chr in tempContents: chrLen = GenomeInfo.getChrLen(self._genome, chr) numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen)) for key in tempContents[chr].keys(): startBinIdx = totBinCount endBinIdx = totBinCount + numBinsInChr brInfo = tempContents[chr][key] if chr in genomeElementChrs: tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \ chrStartIdxs[chr], chrEndIdxs[chr], \ startBinIdx, endBinIdx) else: if chrEndIdxs[chr] - chrStartIdxs[chr] > 0: raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr])) tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0) if chr in genomeElementChrs: totBinCount += numBinsInChr if len(genomeElementChrs - set(tempContents.keys())) > 0: raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys()))) ensurePathExists(self._fn) for chr in tempContents: brInfoDict = tempContents[chr] tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values())) brShelve = safeshelve.open(self._fn) brShelve.update(tempContents) brShelve.close() while not self.fileExists(): from gtrackcore_memmap.application.LogSetup import logMessage logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn) import time time.sleep(0.2)