示例#1
0
 def create_chunking_instance(self, current_list, nnext_list, nProcs):
     current = self.create_pattern('a', current_list)
     nnext = self.create_pattern('b', nnext_list)
     options = tu.set_experiment('tomoRaw')
     options['processes'] = list(range(nProcs))
     # set a dummy process list
     options['process_file'] = \
         tu.get_test_process_path('loaders/basic_tomo_process.nxs')
     exp = Experiment(options)
     test_dict = {'current': current, 'next': nnext}
     chunking = Chunking(exp, test_dict)
     return chunking
示例#2
0
    def _create_entries(self, data, key, current_and_next):
        self.exp._barrier()

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier()
        group = data.backing_file.create_group(group_name)
        self.exp._barrier()
        shape = data.get_shape()
        if current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:

            # change cache properties
            propfaid = group.file.id.get_access_plist()
            settings = list(propfaid.get_cache())
            settings[2] *= 1
            propfaid.set_cache(*settings)
            # calculate total number of chunks and set nSlots=nChunks

            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape,
                                                  data.dtype,
                                                  chunk_max=settings[2])

            self.exp._barrier()
            data.data = self.__create_dataset_nofill(group,
                                                     "data",
                                                     shape,
                                                     data.dtype,
                                                     chunks=chunks)

        self.exp._barrier()

        return group_name, group
示例#3
0
    def _create_entries(self, data, key: str, current_and_next):
        msg = self.__class__.__name__ + '_create_entries'
        self.exp._barrier(msg=msg + '1')

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier(msg=msg + '2')
        group = data.backing_file.require_group(group_name)
        self.exp._barrier(msg=msg + '3')
        shape = data.get_shape()

        if 'data' in group:
            data.data = group['data']
        elif current_and_next is 0:
            logging.warning('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            chunk_max = self.__set_optimal_hdf5_chunk_cache_size(data, group)
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape,
                                                  data.dtype,
                                                  chunk_max=chunk_max)

            self.exp._barrier(msg=msg + '4')
            data.data = self.create_dataset_nofill(group,
                                                   "data",
                                                   shape,
                                                   data.dtype,
                                                   chunks=chunks)

        self.exp._barrier(msg=msg + '5')
        return group_name, group
示例#4
0
    def _create_dosna_dataset(self, object_id, data, key, current_and_next):
        group_name = self.exp.meta_data.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        shape = data.get_shape()
        dataset_name = "{}_{}".format(group_name,
                                      self._extract_digits(object_id))

        if current_and_next is 0:
            data.data = self.dosna_connection.create_dataset(
                dataset_name, shape, data.dtype)
        else:
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            data.data = self.dosna_connection.create_dataset(dataset_name,
                                                             shape,
                                                             data.dtype,
                                                             chunk_size=chunks)
        self.dataset_cache.append(data.data)
示例#5
0
    def pre_process(self):
        # Create the hdf5 output file
        self.hdf5 = Hdf5Utils(self.exp)
        self.in_data = self.get_in_datasets()[0]
        self.data_name = self.in_data.get_name()
        current_pattern = self.__set_current_pattern()
        pattern_idx = {'current': current_pattern, 'next': []}

        self.filename = self.__get_file_name()
        self.group_name = self._get_group_name(self.data_name)
        logging.debug("creating the backing file %s", self.filename)
        self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w')
        group = self.backing_file.create_group(self.group_name)
        group.attrs['NX_class'] = 'NXdata'
        group.attrs['signal'] = 'data'
        self.exp._barrier()
        shape = self.in_data.get_shape()
        chunking = Chunking(self.exp, pattern_idx)
        dtype = self.in_data.data.dtype
        chunks = chunking._calculate_chunking(shape, dtype)
        self.exp._barrier()
        self.out_data = \
            group.create_dataset("data", shape, dtype, chunks=chunks)
示例#6
0
    def __get_backing_file(self, data_obj):
        fname = '%s/%s.h5' % \
            (self.exp.get('out_path'), self.parameters['file_name'])

        if os.path.exists(fname):
            f = h5py.File(fname, 'r')
            return f

        self.hdf5 = Hdf5Utils(self.exp)

        size = tuple(self.parameters['size'])

        patterns = data_obj.get_data_patterns()
        p_name = patterns[self.parameters['pattern']] if \
            self.parameters['pattern'] is not None else patterns.keys()[0]
        p_name = patterns.keys()[0]
        p_dict = patterns[p_name]
        p_dict['max_frames_transfer'] = 1
        nnext = {p_name: p_dict}

        pattern_idx = {'current': nnext, 'next': nnext}
        chunking = Chunking(self.exp, pattern_idx)
        chunks = chunking._calculate_chunking(size, np.int16)

        h5file = self.hdf5._open_backing_h5(fname, 'w')
        dset = h5file.create_dataset('test', size, chunks=chunks)

        self.exp._barrier()

        slice_dirs = nnext.values()[0]['slice_dims']
        nDims = len(dset.shape)
        total_frames = np.prod([dset.shape[i] for i in slice_dirs])
        sub_size = \
            [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)]

        # need an mpi barrier after creating the file before populating it
        idx = 0
        sl, total_frames = \
            self.__get_start_slice_list(slice_dirs, dset.shape, total_frames)
        # calculate the first slice
        for i in range(total_frames):
            low, high = self.parameters['range']
            dset[tuple(sl)] = np.random.randint(low,
                                                high=high,
                                                size=sub_size,
                                                dtype=self.parameters['dtype'])
            if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]:
                idx += 1
                if idx == len(slice_dirs):
                    break
            tmp = sl[slice_dirs[idx]]
            sl[slice_dirs[idx]] = slice(tmp.start + 1, tmp.stop + 1)

        self.exp._barrier()

        try:
            h5file.close()
        except:
            logging.debug(
                'There was a problem trying to close the file in random_hdf5_loader'
            )

        return self.hdf5._open_backing_h5(fname, 'r')