def __init__(self, h5_main, num_components=None): super(SVD, self).__init__(h5_main) self.process_name = 'SVD' ''' Calculate the size of the main data in memory and compare to max_mem We use the minimum of the actual dtype's itemsize and float32 since we don't want to read it in yet and do the proper type conversions. ''' n_samples, n_features = h5_main.shape self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype( h5_main) if num_components is None: num_components = min(n_samples, n_features) else: num_components = min(n_samples, n_features, num_components) self.num_components = num_components self.parms_dict = {'num_components': num_components} self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates( ) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__u = None self.__v = None self.__s = None
def test_compound_numpy(self): with h5py.File(file_path, mode='r') as h5_f: func, is_complex, is_compound, n_features, type_mult = dtype_utils.check_dtype(h5_f['compound']) self.assertEqual(func, dtype_utils.flatten_compound_to_real) self.assertEqual(is_complex, False) self.assertEqual(is_compound, True) self.assertEqual(n_features, 3 * h5_f['compound'].shape[1]) self.assertEqual(type_mult, 3 * np.float32(0).itemsize)
def __init__(self, h5_main, estimator, **kwargs): """ Constructs the Decomposition object. Call the :meth:`~pycroscopy.processing.Decomposition.test()` and :meth:`~pycroscopy.processing.Decomposition.compute()` methods to run the decomposition Parameters ------------ h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset with embedded ancillary spectroscopic, position indices and values datasets estimator : :module:`sklearn.decomposition` object configured decomposition object to apply to the data h5_target_group : h5py.Group, optional. Default = None Location where to look for existing results and to place newly computed results. Use this kwarg if the results need to be written to a different HDF5 file. By default, this value is set to the parent group containing `h5_main` """ allowed_methods = [dec.factor_analysis.FactorAnalysis, dec.fastica_.FastICA, dec.incremental_pca.IncrementalPCA, dec.sparse_pca.MiniBatchSparsePCA, dec.nmf.NMF, dec.pca.PCA, dec.sparse_pca.SparsePCA, dec.truncated_svd.TruncatedSVD] # Store the decomposition object self.estimator = estimator # could not find a nicer way to extract the method name yet self.method_name = str(estimator)[:str(estimator).index('(')] if type(estimator) not in allowed_methods: raise NotImplementedError('Cannot work with {} yet'.format(self.method_name)) # Done with decomposition-related checks, now call super init super(Decomposition, self).__init__(h5_main, 'Decomposition', **kwargs) # set up parameters self.parms_dict = {'decomposition_algorithm':self.method_name} self.parms_dict.update(self.estimator.get_params()) # check for existing datagroups with same results # Partial groups don't make any sense for statistical learning algorithms.... self.duplicate_h5_groups, self.h5_partial_groups = self._check_for_duplicates() # figure out the operation that needs need to be performed to convert to real scalar (self.data_transform_func, self.data_is_complex, self.data_is_compound, self.data_n_features, self.data_type_mult) = check_dtype(h5_main) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__components = None self.__projection = None
def test_real_numpy(self): # real_matrix = np.random.rand(5, 7) # func, is_complex, is_compound, n_features, type_mult = dtype_utils.check_dtype(real_matrix) with h5py.File(file_path, mode='r') as h5_f: func, is_complex, is_compound, n_features, type_mult = dtype_utils.check_dtype(h5_f['real']) self.assertEqual(func, h5_f['real'].dtype.type) self.assertEqual(is_complex, False) self.assertEqual(is_compound, False) self.assertEqual(n_features, h5_f['real'].shape[1]) self.assertEqual(type_mult, h5_f['real'].dtype.type(0).itemsize)
def test_check_dtype_complex_numpy(self): with h5py.File(file_path, mode='r') as h5_f: func, is_complex, is_compound, n_features, type_mult = dtype_utils.check_dtype( h5_f['complex']) self.assertEqual(func, dtype_utils.flatten_complex_to_real) self.assertEqual(is_complex, True) self.assertEqual(is_compound, False) self.assertEqual(n_features, 2 * h5_f['complex'].shape[1]) self.assertEqual(type_mult, 2 * np.real(h5_f['complex'][0, 0]).dtype.itemsize)
def __init__(self, h5_main, estimator): """ Uses the provided (preconfigured) Decomposition object to decompose the provided dataset Parameters ------------ h5_main : HDF5 dataset object Main dataset with ancillary spectroscopic, position indices and values datasets estimator : sklearn.cluster estimator object configured decomposition object to apply to the data """ allowed_methods = [dec.factor_analysis.FactorAnalysis, dec.fastica_.FastICA, dec.incremental_pca.IncrementalPCA, dec.sparse_pca.MiniBatchSparsePCA, dec.nmf.NMF, dec.pca.PCA, dec.sparse_pca.SparsePCA, dec.truncated_svd.TruncatedSVD] # Store the decomposition object self.estimator = estimator # could not find a nicer way to extract the method name yet self.method_name = str(estimator)[:str(estimator).index('(')] if type(estimator) not in allowed_methods: raise NotImplementedError('Cannot work with {} yet'.format(self.method_name)) # Done with decomposition-related checks, now call super init super(Decomposition, self).__init__(h5_main) # set up parameters self.parms_dict = {'decomposition_algorithm':self.method_name} self.parms_dict.update(self.estimator.get_params()) # check for existing datagroups with same results self.process_name = 'Decomposition' # Partial groups don't make any sense for statistical learning algorithms.... self.duplicate_h5_groups, self.h5_partial_groups = self._check_for_duplicates() # figure out the operation that needs need to be performed to convert to real scalar (self.data_transform_func, self.data_is_complex, self.data_is_compound, self.data_n_features, self.data_type_mult) = check_dtype(h5_main) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__components = None self.__projection = None
def __init__(self, h5_main, estimator): """ Constructs the Decomposition object. Call the :meth:`~pycroscopy.processing.Decomposition.test()` and :meth:`~pycroscopy.processing.Decomposition.compute()` methods to run the decomposition Parameters ------------ h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset with embedded ancillary spectroscopic, position indices and values datasets estimator : :module:`sklearn.decomposition` object configured decomposition object to apply to the data """ allowed_methods = [dec.factor_analysis.FactorAnalysis, dec.fastica_.FastICA, dec.incremental_pca.IncrementalPCA, dec.sparse_pca.MiniBatchSparsePCA, dec.nmf.NMF, dec.pca.PCA, dec.sparse_pca.SparsePCA, dec.truncated_svd.TruncatedSVD] # Store the decomposition object self.estimator = estimator # could not find a nicer way to extract the method name yet self.method_name = str(estimator)[:str(estimator).index('(')] if type(estimator) not in allowed_methods: raise NotImplementedError('Cannot work with {} yet'.format(self.method_name)) # Done with decomposition-related checks, now call super init super(Decomposition, self).__init__(h5_main) # set up parameters self.parms_dict = {'decomposition_algorithm':self.method_name} self.parms_dict.update(self.estimator.get_params()) # check for existing datagroups with same results self.process_name = 'Decomposition' # Partial groups don't make any sense for statistical learning algorithms.... self.duplicate_h5_groups, self.h5_partial_groups = self._check_for_duplicates() # figure out the operation that needs need to be performed to convert to real scalar (self.data_transform_func, self.data_is_complex, self.data_is_compound, self.data_n_features, self.data_type_mult) = check_dtype(h5_main) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__components = None self.__projection = None
def __init__(self, h5_main, num_components=None, **kwargs): """ Perform the SVD decomposition on the selected dataset and write the results to h5 file. Parameters ---------- h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset that will be decomposed num_components : int, optional Number of components to decompose h5_main into. Default None. h5_target_group : h5py.Group, optional. Default = None Location where to look for existing results and to place newly computed results. Use this kwarg if the results need to be written to a different HDF5 file. By default, this value is set to the parent group containing `h5_main` kwargs Arguments to be sent to Process """ super(SVD, self).__init__(h5_main, 'SVD', **kwargs) ''' Calculate the size of the main data in memory and compare to max_mem We use the minimum of the actual dtype's itemsize and float32 since we don't want to read it in yet and do the proper type conversions. ''' n_samples, n_features = h5_main.shape self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype( h5_main) if num_components is None: num_components = min(n_samples, n_features) else: num_components = min(n_samples, n_features, num_components) self.num_components = num_components # Check that we can actually compute the SVD with the selected number of components self._check_available_mem() self.parms_dict = {'num_components': num_components} self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates( ) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__u = None self.__v = None self.__s = None
def __init__(self, h5_main, num_components=None, **kwargs): """ Perform the SVD decomposition on the selected dataset and write the results to h5 file. Parameters ---------- h5_main : USIDataset Dataset to be decomposed. num_components : int, optional Number of components to decompose h5_main into. Default None. kwargs Arguments to be sent to Process """ super(SVD, self).__init__(h5_main, **kwargs) self.process_name = 'SVD' ''' Calculate the size of the main data in memory and compare to max_mem We use the minimum of the actual dtype's itemsize and float32 since we don't want to read it in yet and do the proper type conversions. ''' n_samples, n_features = h5_main.shape self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype( h5_main) if num_components is None: num_components = min(n_samples, n_features) else: num_components = min(n_samples, n_features, num_components) self.num_components = num_components # Check that we can actually compute the SVD with the selected number of components self._check_available_mem() self.parms_dict = {'num_components': num_components} self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates( ) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__u = None self.__v = None self.__s = None
def __init__(self, h5_main, num_components=None, **kwargs): """ Perform the SVD decomposition on the selected dataset and write the results to h5 file. Parameters ---------- h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset that will be decomposed num_components : int, optional Number of components to decompose h5_main into. Default None. kwargs Arguments to be sent to Process """ super(SVD, self).__init__(h5_main, **kwargs) self.process_name = 'SVD' ''' Calculate the size of the main data in memory and compare to max_mem We use the minimum of the actual dtype's itemsize and float32 since we don't want to read it in yet and do the proper type conversions. ''' n_samples, n_features = h5_main.shape self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_main) if num_components is None: num_components = min(n_samples, n_features) else: num_components = min(n_samples, n_features, num_components) self.num_components = num_components # Check that we can actually compute the SVD with the selected number of components self._check_available_mem() self.parms_dict = {'num_components': num_components} self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates() # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__u = None self.__v = None self.__s = None
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice( components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError( 'SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0**2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print( 'Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format( comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt
def __init__(self, h5_main, estimator, num_comps=None, **kwargs): """ Constructs the Cluster object. Call the :meth:`~pycroscopy.processing.Cluster.test()` and :meth:`~pycroscopy.processing.Cluster.compute()` methods to run the clustering Parameters ---------- h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset estimator : :class:`sklearn.cluster` estimator configured clustering algorithm to be applied to the data num_comps : int (unsigned), optional. Default = None / all Number of features / spectroscopic indices to be used to cluster the data h5_target_group : h5py.Group, optional. Default = None Location where to look for existing results and to place newly computed results. Use this kwarg if the results need to be written to a different HDF5 file. By default, this value is set to the parent group containing `h5_main` """ allowed_methods = [ cls.AgglomerativeClustering, cls.Birch, cls.KMeans, cls.MiniBatchKMeans, cls.SpectralClustering ] # could not find a nicer way to extract the method name yet self.method_name = str(estimator)[:str(estimator).index('(')] if type(estimator) not in allowed_methods: raise TypeError('Cannot work with {} just yet'.format( self.method_name)) # Done with decomposition-related checks, now call super init super(Cluster, self).__init__(h5_main, 'Cluster', **kwargs) # Store the decomposition object self.estimator = estimator if num_comps is None: comp_attr = 'all' comp_slice, num_comps = get_component_slice( num_comps, total_components=self.h5_main.shape[1]) self.num_comps = num_comps self.data_slice = (slice(None), comp_slice) if isinstance(comp_slice, slice): # cannot store slice as an attribute in hdf5 # convert to list of integers! inds = comp_slice.indices(self.h5_main.shape[1]) # much like range, inds are arranged as (start, stop, step) if inds[0] == 0 and inds[2] == 1: # starting from 0 with step of 1 = upto N components if inds[1] >= self.h5_main.shape[1] - 1: comp_attr = 'all' else: comp_attr = inds[1] else: comp_attr = range(*inds) elif comp_attr == 'all': pass else: # subset of spectral components specified as an array comp_attr = comp_slice # set up parameters self.parms_dict = { 'cluster_algorithm': self.method_name, 'spectral_components': comp_attr } self.parms_dict.update(self.estimator.get_params()) # update n_jobs according to the cores argument # print('cores reset to', self._cores) # different number of cores should not* be a reason for different results # so we update this flag only after checking for duplicates estimator.n_jobs = self._cores self.parms_dict.update({'n_jobs': self._cores}) # check for existing datagroups with same results # Partial groups don't make any sense for statistical learning algorithms.... self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates( ) # figure out the operation that needs need to be performed to convert to real scalar (self.data_transform_func, self.data_is_complex, self.data_is_compound, self.data_n_features, self.data_type_mult) = check_dtype(h5_main) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__labels = None self.__mean_resp = None
def test_non_hdf(self): with self.assertRaises(TypeError): _ = dtype_utils.check_dtype(np.arange(15))
def __init__(self, h5_main, estimator, num_comps=None): """ Constructs the Cluster object. Call the :meth:`~pycroscopy.processing.Cluster.test()` and :meth:`~pycroscopy.processing.Cluster.compute()` methods to run the clustering Parameters ---------- h5_main : :class:`pyUSID.USIDataset` object USID Main HDF5 dataset estimator : :class:`sklearn.cluster` estimator configured clustering algorithm to be applied to the data num_comps : int (unsigned), optional. Default = None / all Number of features / spectroscopic indices to be used to cluster the data """ allowed_methods = [cls.AgglomerativeClustering, cls.Birch, cls.KMeans, cls.MiniBatchKMeans, cls.SpectralClustering] # could not find a nicer way to extract the method name yet self.method_name = str(estimator)[:str(estimator).index('(')] if type(estimator) not in allowed_methods: raise TypeError('Cannot work with {} just yet'.format(self.method_name)) # Done with decomposition-related checks, now call super init super(Cluster, self).__init__(h5_main) # Store the decomposition object self.estimator = estimator if num_comps is None: comp_attr = 'all' comp_slice, num_comps = get_component_slice(num_comps, total_components=self.h5_main.shape[1]) self.num_comps = num_comps self.data_slice = (slice(None), comp_slice) if isinstance(comp_slice, slice): # cannot store slice as an attribute in hdf5 # convert to list of integers! inds = comp_slice.indices(self.h5_main.shape[1]) # much like range, inds are arranged as (start, stop, step) if inds[0] == 0 and inds[2] == 1: # starting from 0 with step of 1 = upto N components if inds[1] >= self.h5_main.shape[1] - 1: comp_attr = 'all' else: comp_attr = inds[1] else: comp_attr = range(*inds) elif comp_attr == 'all': pass else: # subset of spectral components specified as an array comp_attr = comp_slice # set up parameters self.parms_dict = {'cluster_algorithm': self.method_name, 'spectral_components': comp_attr} self.parms_dict.update(self.estimator.get_params()) # update n_jobs according to the cores argument # print('cores reset to', self._cores) # different number of cores should not* be a reason for different results # so we update this flag only after checking for duplicates estimator.n_jobs = self._cores self.parms_dict.update({'n_jobs': self._cores}) # check for existing datagroups with same results self.process_name = 'Cluster' # Partial groups don't make any sense for statistical learning algorithms.... self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates() # figure out the operation that needs need to be performed to convert to real scalar (self.data_transform_func, self.data_is_complex, self.data_is_compound, self.data_n_features, self.data_type_mult) = check_dtype(h5_main) # supercharge h5_main! self.h5_main = USIDataset(self.h5_main) self.__labels = None self.__mean_resp = None
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print('Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt