def create_chunking_instance(self, current_list, nnext_list, nProcs): current = self.create_pattern('a', current_list) nnext = self.create_pattern('b', nnext_list) options = tu.set_experiment('tomoRaw') options['processes'] = list(range(nProcs)) # set a dummy process list options['process_file'] = \ tu.get_test_process_path('loaders/basic_tomo_process.nxs') exp = Experiment(options) test_dict = {'current': current, 'next': nnext} chunking = Chunking(exp, test_dict) return chunking
def _create_entries(self, data, key, current_and_next): self.exp._barrier() expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier() group = data.backing_file.create_group(group_name) self.exp._barrier() shape = data.get_shape() if current_and_next is 0: logging.warn('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: # change cache properties propfaid = group.file.id.get_access_plist() settings = list(propfaid.get_cache()) settings[2] *= 1 propfaid.set_cache(*settings) # calculate total number of chunks and set nSlots=nChunks chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype, chunk_max=settings[2]) self.exp._barrier() data.data = self.__create_dataset_nofill(group, "data", shape, data.dtype, chunks=chunks) self.exp._barrier() return group_name, group
def _create_entries(self, data, key: str, current_and_next): msg = self.__class__.__name__ + '_create_entries' self.exp._barrier(msg=msg + '1') expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier(msg=msg + '2') group = data.backing_file.require_group(group_name) self.exp._barrier(msg=msg + '3') shape = data.get_shape() if 'data' in group: data.data = group['data'] elif current_and_next is 0: logging.warning('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: chunk_max = self.__set_optimal_hdf5_chunk_cache_size(data, group) chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype, chunk_max=chunk_max) self.exp._barrier(msg=msg + '4') data.data = self.create_dataset_nofill(group, "data", shape, data.dtype, chunks=chunks) self.exp._barrier(msg=msg + '5') return group_name, group
def _create_dosna_dataset(self, object_id, data, key, current_and_next): group_name = self.exp.meta_data.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass shape = data.get_shape() dataset_name = "{}_{}".format(group_name, self._extract_digits(object_id)) if current_and_next is 0: data.data = self.dosna_connection.create_dataset( dataset_name, shape, data.dtype) else: chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) data.data = self.dosna_connection.create_dataset(dataset_name, shape, data.dtype, chunk_size=chunks) self.dataset_cache.append(data.data)
def pre_process(self): # Create the hdf5 output file self.hdf5 = Hdf5Utils(self.exp) self.in_data = self.get_in_datasets()[0] self.data_name = self.in_data.get_name() current_pattern = self.__set_current_pattern() pattern_idx = {'current': current_pattern, 'next': []} self.filename = self.__get_file_name() self.group_name = self._get_group_name(self.data_name) logging.debug("creating the backing file %s", self.filename) self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w') group = self.backing_file.create_group(self.group_name) group.attrs['NX_class'] = 'NXdata' group.attrs['signal'] = 'data' self.exp._barrier() shape = self.in_data.get_shape() chunking = Chunking(self.exp, pattern_idx) dtype = self.in_data.data.dtype chunks = chunking._calculate_chunking(shape, dtype) self.exp._barrier() self.out_data = \ group.create_dataset("data", shape, dtype, chunks=chunks)
def __get_backing_file(self, data_obj): fname = '%s/%s.h5' % \ (self.exp.get('out_path'), self.parameters['file_name']) if os.path.exists(fname): f = h5py.File(fname, 'r') return f self.hdf5 = Hdf5Utils(self.exp) size = tuple(self.parameters['size']) patterns = data_obj.get_data_patterns() p_name = patterns[self.parameters['pattern']] if \ self.parameters['pattern'] is not None else patterns.keys()[0] p_name = patterns.keys()[0] p_dict = patterns[p_name] p_dict['max_frames_transfer'] = 1 nnext = {p_name: p_dict} pattern_idx = {'current': nnext, 'next': nnext} chunking = Chunking(self.exp, pattern_idx) chunks = chunking._calculate_chunking(size, np.int16) h5file = self.hdf5._open_backing_h5(fname, 'w') dset = h5file.create_dataset('test', size, chunks=chunks) self.exp._barrier() slice_dirs = nnext.values()[0]['slice_dims'] nDims = len(dset.shape) total_frames = np.prod([dset.shape[i] for i in slice_dirs]) sub_size = \ [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)] # need an mpi barrier after creating the file before populating it idx = 0 sl, total_frames = \ self.__get_start_slice_list(slice_dirs, dset.shape, total_frames) # calculate the first slice for i in range(total_frames): low, high = self.parameters['range'] dset[tuple(sl)] = np.random.randint(low, high=high, size=sub_size, dtype=self.parameters['dtype']) if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]: idx += 1 if idx == len(slice_dirs): break tmp = sl[slice_dirs[idx]] sl[slice_dirs[idx]] = slice(tmp.start + 1, tmp.stop + 1) self.exp._barrier() try: h5file.close() except: logging.debug( 'There was a problem trying to close the file in random_hdf5_loader' ) return self.hdf5._open_backing_h5(fname, 'r')