示例#1
0
    def pre_process(self):
        # Create the hdf5 output file
        self.hdf5 = Hdf5Utils(self.exp)
        self.in_data = self.get_in_datasets()[0]
        self.data_name = self.in_data.get_name()
        current_pattern = self.__set_current_pattern()
        pattern_idx = {'current': current_pattern, 'next': []}

        self.filename = self.__get_file_name()
        self.group_name = self._get_group_name(self.data_name)
        logging.debug("creating the backing file %s", self.filename)
        self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w')
        group = self.backing_file.create_group(self.group_name)
        group.attrs['NX_class'] = 'NXdata'
        group.attrs['signal'] = 'data'
        self.exp._barrier()
        shape = self.in_data.get_shape()
        chunking = Chunking(self.exp, pattern_idx)
        dtype = self.in_data.data.dtype
        chunks = chunking._calculate_chunking(shape, dtype)
        self.exp._barrier()
        self.out_data = self.hdf5.create_dataset_nofill(group,
                                                        "data",
                                                        shape,
                                                        dtype,
                                                        chunks=chunks)
示例#2
0
    def __get_backing_file(self, data_obj):
        fname = '%s/%s.h5' % \
            (self.exp.get('out_path'), self.parameters['file_name'])

        if os.path.exists(fname):
            return h5py.File(fname, 'r')

        self.hdf5 = Hdf5Utils(self.exp)

        size = tuple(self.parameters['size'])

        patterns = data_obj.get_data_patterns()
        p_name = patterns[self.parameters['pattern']] if \
            self.parameters['pattern'] is not None else patterns.keys()[0]
        p_name = patterns.keys()[0]
        p_dict = patterns[p_name]
        p_dict['max_frames_transfer'] = 1
        nnext = {p_name: p_dict}

        pattern_idx = {'current': nnext, 'next': nnext}
        chunking = Chunking(self.exp, pattern_idx)
        chunks = chunking._calculate_chunking(size, np.int16)

        h5file = self.hdf5._open_backing_h5(fname, 'w')
        dset = h5file.create_dataset('test', size, chunks=chunks)

        self.exp._barrier()

        slice_dirs = nnext.values()[0]['slice_dims']
        nDims = len(dset.shape)
        total_frames = np.prod([dset.shape[i] for i in slice_dirs])
        sub_size = \
            [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)]

        # need an mpi barrier after creating the file before populating it
        idx = 0
        sl, total_frames = \
            self.__get_start_slice_list(slice_dirs, dset.shape, total_frames)
        # calculate the first slice
        for i in range(total_frames):
            low, high = self.parameters['range']
            dset[tuple(sl)] = np.random.randint(
                low, high=high, size=sub_size, dtype=self.parameters['dtype'])
            if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]:
                idx += 1
                if idx == len(slice_dirs):
                    break
            tmp = sl[slice_dirs[idx]]
            sl[slice_dirs[idx]] = slice(tmp.start+1, tmp.stop+1)

        self.exp._barrier()

#        try:
#            h5file.close()
#        except:
#            logging.debug('There was a problem trying to close the file in random_hdf5_loader')

        return self.hdf5._open_backing_h5(fname, 'r')
示例#3
0
 def _add_input_data_to_nxs_file(self, transport):
     # save the loaded data to file
     h5 = Hdf5Utils(self)
     for name, data in self.index['in_data'].items():
         self.meta_data.set(['link_type', name], 'input_data')
         self.meta_data.set(['group_name', name], name)
         self.meta_data.set(['filename', name], data.backing_file)
         transport._populate_nexus_file(data)
         h5._link_datafile_to_nexus_file(data)
示例#4
0
 def _transport_pre_plugin_list_run(self):
     # loaders have completed now revert back to BasicTransport, so any
     # output datasets created by a plugin will use this.
     self.hdf5 = Hdf5Utils(self.exp)
     self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow()
     self.exp.meta_data.set('transport', 'basic')
     plist = self.exp.meta_data.plugin_list
     self.n_plugins = plist._get_n_processing_plugins()
     self.final_dict = plist.plugin_list[-1]
示例#5
0
    def _transport_post_plugin(self):
        if self.count == self.n_plugins - 2:
            self.exp.meta_data.set('transport', 'hdf5')

        elif self.count == self.n_plugins - 1:  # final plugin
            self.h5trans.exp = self.exp
            self.h5trans.hdf5 = Hdf5Utils(self.exp)
            self.h5trans._transport_post_plugin()

        self.count += 1
示例#6
0
    def _transport_pre_plugin_list_run(self):
        # run through the experiment (no processing) and create output files
        self.hdf5 = Hdf5Utils(self.exp)
        self.exp_coll = self.exp._get_experiment_collection()
        self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow()
        n_plugins = range(len(self.exp_coll['datasets']))

        for i in n_plugins:
            self.exp._set_experiment_for_current_plugin(i)
            self.files.append(
                self._get_filenames(self.exp_coll['plugin_dict'][i]))
            self._set_file_details(self.files[i])
            self._setup_h5_files()  # creates the hdf5 files
示例#7
0
    def _transport_post_plugin(self):
        # revert back to basic if a temporary transport mechanism was used
        if self.hdf5_flag:
            self.__unset_hdf5_transport()

        if self.count == self.n_plugins - 2:
            self.exp.meta_data.set('transport', 'hdf5')

        if self.count == self.n_plugins - 1:  # final plugin
            self.h5trans.exp = self.exp
            self.h5trans.hdf5 = Hdf5Utils(self.exp)
            self.h5trans._transport_post_plugin()

        self.count += 1
示例#8
0
 def __init__(self, exp, name='Checkpointing'):
     self._exp = exp
     self._h5 = Hdf5Utils(self._exp)
     self._filename = '_checkpoint.h5'
     self._file = None
     self._start_values = (0, 0, 0)
     self._completed_plugins = 0
     self._level = None
     self._proc_idx = 0
     self._trans_idx = 0
     self._comm = None
     self._timer = None
     self._set_timer()
     self.meta_data = MetaData()
示例#9
0
 def _transport_pre_plugin_list_run(self):
     # loaders have completed now revert back to DosnaTransport, so any
     # output datasets created by a plugin will use this.
     self.hdf5 = Hdf5Utils(self.exp)
     exp_coll = self.exp._get_experiment_collection()
     self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow()
     self.exp.meta_data.set('transport', 'dosna')
     plist = self.exp.meta_data.plugin_list
     self.n_plugins = plist._get_n_processing_plugins()
     self.final_dict = plist.plugin_list[-1]
     for plugin_index in range(self.n_plugins):
         self.exp._set_experiment_for_current_plugin(plugin_index)
         self.files.append(
             self._get_filenames(exp_coll['plugin_dict'][plugin_index]))
         self._set_file_details(self.files[plugin_index])
         self._setup_dosna_objects()  # creates the dosna objects