def _create_entries(self, data, key, current_and_next): self.exp._barrier() expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier() group = data.backing_file.create_group(group_name) self.exp._barrier() shape = data.get_shape() if current_and_next is 0: logging.warn('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) self.exp._barrier() logging.warn('Creating the dataset with chunks.') data.data = self.__create_dataset_nofill(group, "data", shape, data.dtype, chunks=chunks) logging.warn('Dataset created!') self.exp._barrier() return group_name, group
def _create_entries(self, data, key, current_and_next): self.exp._barrier() expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier() group = data.backing_file.create_group(group_name) self.exp._barrier() shape = data.get_shape() if current_and_next is 0: logging.warn('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) self.exp._barrier() nBytes = np.prod(shape) * np.dtype(data.dtype).itemsize nProcs = self.exp.meta_data.get('nProcesses') # parallel hdf5 cannot handle data_size/nProcesses > 2GB self.__hdf5_file_write_failed_check(nBytes, nProcs) data.data = group.create_dataset("data", shape, data.dtype, chunks=chunks) self.exp._barrier() return group_name, group
def pre_process(self): # Create the hdf5 output file self.hdf5 = Hdf5Utils(self.exp) self.in_data = self.get_in_datasets()[0] self.data_name = self.in_data.get_name() current_pattern = self.__set_current_pattern() pattern_idx = {'current': current_pattern, 'next': []} self.filename = self.__get_file_name() self.group_name = self._get_group_name(self.data_name) logging.debug("creating the backing file %s", self.filename) self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w') group = self.backing_file.create_group(self.group_name) group.attrs['NX_class'] = 'NXdata' group.attrs['signal'] = 'data' self.exp._barrier() shape = self.in_data.get_shape() chunking = Chunking(self.exp, pattern_idx) dtype = self.in_data.data.dtype chunks = chunking._calculate_chunking(shape, dtype) self.exp._barrier() self.out_data = self.hdf5.create_dataset_nofill(group, "data", shape, dtype, chunks=chunks)
def _create_entries(self, data, key, current_and_next): msg = self.__class__.__name__ + '_create_entries' self.exp._barrier(msg=msg+'1') expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier(msg=msg+'2') group = data.backing_file.require_group(group_name) self.exp._barrier(msg=msg+'3') shape = data.get_shape() if 'data' in group: data.data = group['data'] elif current_and_next is 0: logging.warn('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: chunk_max = self.__set_optimal_hdf5_chunk_cache_size(data, group) chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype, chunk_max=chunk_max) self.exp._barrier(msg=msg+'4') data.data = self.create_dataset_nofill( group, "data", shape, data.dtype, chunks=chunks) self.exp._barrier(msg=msg+'5') return group_name, group
def __create_entries(self, data, key, current_and_next): expInfo = self.exp.meta_data group_name = expInfo.get_meta_data(["group_name", key]) data.data_info.set_meta_data('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass group = data.backing_file.create_group(group_name) group.attrs[NX_CLASS] = 'NXdata' group.attrs['signal'] = 'data' logging.info("create_entries: 1") self.exp._barrier() shape = data.get_shape() if current_and_next is 0: data.data = group.create_dataset("data", shape, data.dtype) else: logging.info("create_entries: 2") self.exp._barrier() chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) logging.info("create_entries: 3") self.exp._barrier() data.data = group.create_dataset("data", shape, data.dtype, chunks=chunks) logging.info("create_entries: 4") self.exp._barrier() return group_name, group
def __get_backing_file(self, data_obj): fname = '%s/%s.h5' % \ (self.exp.get('out_path'), self.parameters['file_name']) if os.path.exists(fname): return h5py.File(fname, 'r') self.hdf5 = Hdf5Utils(self.exp) size = tuple(self.parameters['size']) patterns = data_obj.get_data_patterns() p_name = patterns[self.parameters['pattern']] if \ self.parameters['pattern'] is not None else patterns.keys()[0] p_name = patterns.keys()[0] p_dict = patterns[p_name] p_dict['max_frames_transfer'] = 1 nnext = {p_name: p_dict} pattern_idx = {'current': nnext, 'next': nnext} chunking = Chunking(self.exp, pattern_idx) chunks = chunking._calculate_chunking(size, np.int16) h5file = self.hdf5._open_backing_h5(fname, 'w') dset = h5file.create_dataset('test', size, chunks=chunks) self.exp._barrier() slice_dirs = nnext.values()[0]['slice_dims'] nDims = len(dset.shape) total_frames = np.prod([dset.shape[i] for i in slice_dirs]) sub_size = \ [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)] # need an mpi barrier after creating the file before populating it idx = 0 sl, total_frames = \ self.__get_start_slice_list(slice_dirs, dset.shape, total_frames) # calculate the first slice for i in range(total_frames): low, high = self.parameters['range'] dset[tuple(sl)] = np.random.randint( low, high=high, size=sub_size, dtype=self.parameters['dtype']) if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]: idx += 1 if idx == len(slice_dirs): break tmp = sl[slice_dirs[idx]] sl[slice_dirs[idx]] = slice(tmp.start+1, tmp.stop+1) self.exp._barrier() # try: # h5file.close() # except: # logging.debug('There was a problem trying to close the file in random_hdf5_loader') return self.hdf5._open_backing_h5(fname, 'r')
def _create_entries(self, data, key, current_and_next): self.exp._barrier() expInfo = self.exp.meta_data group_name = expInfo.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass self.exp._barrier() group = data.backing_file.create_group(group_name) self.exp._barrier() shape = data.get_shape() if current_and_next is 0: logging.warn('Creating the dataset without chunks') data.data = group.create_dataset("data", shape, data.dtype) else: # change cache properties propfaid = group.file.id.get_access_plist() settings = list(propfaid.get_cache()) settings[2] *= 1 propfaid.set_cache(*settings) # calculate total number of chunks and set nSlots=nChunks chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype, chunk_max=settings[2]) self.exp._barrier() data.data = self.__create_dataset_nofill(group, "data", shape, data.dtype, chunks=chunks) self.exp._barrier() return group_name, group
def _create_dosna_dataset(self, object_id, data, key, current_and_next): group_name = self.exp.meta_data.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass shape = data.get_shape() dataset_name = "{}_{}".format(group_name, self._extract_digits(object_id)) if current_and_next is 0: data.data = self.dosna_connection.create_dataset( dataset_name, shape, data.dtype) else: chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) data.data = self.dosna_connection.create_dataset(dataset_name, shape, data.dtype, chunk_size=chunks) self.dataset_cache.append(data.data)
def _create_dosna_dataset(self, object_id, data, key, current_and_next): group_name = self.exp.meta_data.get(["group_name", key]) data.data_info.set('group_name', group_name) try: group_name = group_name + '_' + data.name except AttributeError: pass shape = data.get_shape() dataset_name = "{}_{}".format(group_name, self._extract_digits(object_id)) if current_and_next is 0: data.data = self.dosna_connection.create_dataset(dataset_name, shape, data.dtype) else: chunking = Chunking(self.exp, current_and_next) chunks = chunking._calculate_chunking(shape, data.dtype) data.data = self.dosna_connection.create_dataset(dataset_name, shape, data.dtype, chunk_size=chunks) self.dataset_cache.append(data.data)