def __init__(self, chroms, # pylint: disable=too-many-locals stranded=True, conditions=None, typecode='d', datatags=None, resolution=1, order=1, store_whole_genome=True, cache=True, overwrite=False, loader=None, loader_args=None): super(NPGenomicArray, self).__init__(stranded, conditions, typecode, resolution, order, store_whole_genome) if stranded: datatags = datatags + ['stranded'] if datatags else ['stranded'] memmap_dir = _get_output_data_location(datatags) filename = 'storage.npz' if cache and not os.path.exists(memmap_dir): os.makedirs(memmap_dir) if cache and not os.path.exists(os.path.join(memmap_dir, filename)) \ or overwrite or not cache: data = {chrom: numpy.zeros(shape=(int(numpy.ceil(chroms[chrom] / self.resolution)), 2 if stranded else 1, len(self.condition)), dtype=self.typecode) for chrom in chroms} self.handle = data # invoke the loader if loader: loader(self, *loader_args) condition = [numpy.string_(x) for x in self.condition] names = [x for x in data] data['conditions'] = condition data['order'] = order data['resolution'] = resolution if cache: numpy.savez(os.path.join(memmap_dir, filename), **data) if cache: print('reload {}'.format(os.path.join(memmap_dir, filename))) data = numpy.load(os.path.join(memmap_dir, filename)) names = [x for x in data.files if x not in ['conditions', 'order', 'resolution']] condition = data['conditions'] order = data['order'] resolution = data['resolution'] # here we get either the freshly loaded data or the reloaded # data from numpy.load. self.handle = {key: data[key] for key in names} self.condition = condition self.resolution = resolution self.order = order
def _get_cachefile(cachestr, tags, fileending): """ Determine cache file location """ filename = None if cachestr is not None: memmap_dir = _get_output_data_location(tags) if not os.path.exists(memmap_dir): os.makedirs(memmap_dir) filename = str(cachestr) + fileending filename = os.path.join(memmap_dir, filename) return filename return None
def __init__(self, chroms, # pylint: disable=too-many-locals stranded=True, conditions=None, typecode='d', datatags=None, resolution=1, order=1, store_whole_genome=True, cache=True, overwrite=False, loader=None, loader_args=None): super(HDF5GenomicArray, self).__init__(stranded, conditions, typecode, resolution, order, store_whole_genome) if not cache: raise ValueError('HDF5 format requires cache=True') if stranded: datatags = datatags + ['stranded'] if datatags else ['stranded'] memmap_dir = _get_output_data_location(datatags) filename = 'storage.h5' if not os.path.exists(memmap_dir): os.makedirs(memmap_dir) if not os.path.exists(os.path.join(memmap_dir, filename)) or overwrite: self.handle = h5py.File(os.path.join(memmap_dir, filename), 'w') for chrom in chroms: shape = (int(numpy.ceil(chroms[chrom] / self.resolution)), 2 if stranded else 1, len(self.condition)) self.handle.create_dataset(chrom, shape, dtype=self.typecode, compression='gzip', data=numpy.zeros(shape, dtype=self.typecode)) self.handle.attrs['conditions'] = [numpy.string_(x) for x in self.condition] self.handle.attrs['order'] = self.order self.handle.attrs['resolution'] = self.resolution # invoke the loader if loader: loader(self, *loader_args) self.handle.close() print('reload {}'.format(os.path.join(memmap_dir, filename))) self.handle = h5py.File(os.path.join(memmap_dir, filename), 'r', driver='stdio') self.condition = self.handle.attrs['conditions'] self.order = self.handle.attrs['order'] self.resolution = self.handle.attrs['resolution']
def __init__( self, chroms, # pylint: disable=too-many-locals stranded=True, conditions=None, typecode='d', datatags=None, resolution=1, order=1, store_whole_genome=True, cache=True, overwrite=False, loader=None, collapser=None): super(SparseGenomicArray, self).__init__(stranded, conditions, typecode, resolution, order, store_whole_genome, collapser) if stranded: datatags = datatags + ['stranded'] if datatags else ['stranded'] memmap_dir = _get_output_data_location(datatags) filename = 'sparse.npz' if not os.path.exists(memmap_dir): os.makedirs(memmap_dir) if cache and not os.path.exists(os.path.join(memmap_dir, filename)) \ or overwrite or not cache: data = { chrom: sparse.dok_matrix( (_get_iv_length(chroms[chrom], self.resolution), (2 if stranded else 1) * len(self.condition)), dtype=self.typecode) for chrom in chroms } self.handle = data # invoke the loader if loader: loader(self) data = self.handle data = {chrom: data[chrom].tocoo() for chrom in data} condition = [np.string_(x) for x in self.condition] names = [x for x in data] storage = {chrom: np.column_stack([data[chrom].data, data[chrom].row, data[chrom].col]) \ for chrom in data} storage.update({'shape.'+chrom: \ np.asarray(data[chrom].shape) for chrom in data}) storage['conditions'] = condition storage['order'] = order storage['resolution'] = resolution if resolution is not None else 0 if cache: np.savez(os.path.join(memmap_dir, filename), **storage) if cache: print('reload {}'.format(os.path.join(memmap_dir, filename))) storage = np.load(os.path.join(memmap_dir, filename)) names = [ x for x in storage.files if x not in ['conditions', 'order', 'resolution'] and x[:6] != 'shape.' ] condition = storage['conditions'] order = storage['order'] resolution = storage[ 'resolution'] if storage['resolution'] > 0 else None self.handle = { key: sparse.coo_matrix( (storage[key][:, 0], (storage[key][:, 1].astype('int'), storage[key][:, 2].astype('int'))), shape=tuple(storage['shape.' + key])).tocsr() for key in names } self.condition = condition self.resolution = resolution self.order = order