def _assertWriteElement(self, filePrefix, contents, dataType, dataTypeDim, maxNumEdges, maxStrLens, \ assertContents, assertDataType, assertDataTypeDim, assertElementDim): s = Setup(filePrefix, len(assertContents), assertDataType, assertDataTypeDim, assertElementDim) valDataType = dataType if filePrefix == 'val' else 'float64' valDim = dataTypeDim if filePrefix == 'val' else 1 weightDataType = dataType if filePrefix == 'weights' else 'float64' weightDim = dataTypeDim if filePrefix == 'weights' else 1 geList = GEList() memberName = '_' + filePrefix + 'List' if hasattr(geList, memberName): geList.__dict__[memberName] = contents else: geList._extraList = [{filePrefix: x} for x in contents] #print s.fn, s.path, s.filePrefix, len(geList), valDataType, valDim, weightDataType, weightDim, maxNumEdges, maxStrLens of = OutputFile(s.path, s.filePrefix, len(assertContents), valDataType, valDim, weightDataType, weightDim, maxNumEdges, maxStrLens) for ge in geList: of.writeElement(ge) of.close() self.assertTrue(os.path.exists(s.fn)) fileContents = [ el for el in memmap(s.fn, dtype=s.dataType, shape=s.shape, mode='r') ] self.assertListsOrDicts(assertContents, fileContents) return s
def _assertWrite(self, filePrefix, dataType, contents): s = Setup(filePrefix, len(contents), dataType, 1, None) of = OutputFile(s.path, s.filePrefix, len(contents)) for i in contents: of.write(i) of.close() self.assertTrue(os.path.exists(s.fn)) fileContents = [i for i in memmap(s.fn, dataType, mode='r')] self.assertListsOrDicts(contents, fileContents)
def writeIndexes(self): numIndexElements = int( math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize())) self._leftIndexFile = OutputFile(self._path, 'leftIndex', numIndexElements, allowAppend=False) self._rightIndexFile = OutputFile(self._path, 'rightIndex', numIndexElements, allowAppend=False) if self._startFile: lefts = self._startFile.getContents() else: lefts = np.r_[0, self._endFile.getContents()[:-1]] if self._endFile: rights = self._endFile.getContents() if not self._startFile: rights = rights[1:] else: rights = self._startFile.getContents() + 1 bin_i = 0 i = 0 for i, right in enumerate(rights): while right > (bin_i) * CompBinManager.getIndexBinSize(): self._leftIndexFile.write(i) bin_i += 1 bin_j = 0 j = 0 for j, left in enumerate(lefts): while left >= (bin_j + 1) * CompBinManager.getIndexBinSize(): self._rightIndexFile.write(j) bin_j += 1 self._fillRestOfIndexFile(bin_i, i + 1, self._leftIndexFile) self._fillRestOfIndexFile(bin_j, j + 1, self._rightIndexFile)
def __init__(self, path, prefixList, fileArraySize, chrSize, valDataType='float64', valDim=1, \ weightDataType='float64', weightDim=1, maxNumEdges=0, maxStrLens={}, elementsAreSorted=False): self._files = OrderedDict() if not os.path.exists(path): os.makedirs(path) for prefix in prefixList: self._files[prefix] = OutputFile(path, prefix, fileArraySize, valDataType, valDim, weightDataType, weightDim, maxNumEdges, maxStrLens) if 'start' in self._files or 'end' in self._files: self._indexFiles = OutputIndexFilePair(path, chrSize, self._files.get('start'), self._files.get('end')) else: self._indexFiles = None self._elementsAreSorted = elementsAreSorted
def testLen(self): size = 123 s = Setup('start', size, 'int32', 1, None) of = OutputFile(s.path, s.filePrefix, size) self.assertEqual(size, len(of))