Exemplo n.º 1
0
 def test_stack_real_nd_to_target_complex_h5_legal(self):
     with h5py.File(file_path, mode='r') as h5_f:
         h5_real = h5_f['real2']
         expected = h5_real[:, :, :3] + 1j * h5_real[:, :, 3:]
         actual = dtype_utils.stack_real_to_target_dtype(h5_real,
                                                         np.complex)
         self.assertTrue(np.allclose(actual, expected))
Exemplo n.º 2
0
 def stack_real_to_target_complex_nd(self):
     expected = 5 * np.random.rand(2, 3, 5, 8) + 7j * np.random.rand(
         2, 3, 5, 8)
     real_val = np.concatenate(
         [np.real(expected), np.imag(expected)], axis=3)
     actual = dtype_utils.stack_real_to_target_dtype(real_val, np.complex)
     self.assertTrue(np.allclose(actual, expected))
Exemplo n.º 3
0
 def test_compound_nd(self):
     num_elems = (2, 3, 5, 7)
     structured_array = np.zeros(shape=num_elems, dtype=struc_dtype)
     structured_array['r'] = r_vals = np.random.random(size=num_elems)
     structured_array['g'] = g_vals = np.random.randint(0, high=1024, size=num_elems)
     structured_array['b'] = b_vals = np.random.random(size=num_elems)
     real_val = np.concatenate((r_vals, g_vals, b_vals), axis=len(num_elems) - 1)
     actual = dtype_utils.stack_real_to_target_dtype(real_val, struc_dtype)
     self.assertTrue(compare_structured_arrays(actual, structured_array))
Exemplo n.º 4
0
 def __mean_resp_for_cluster(clust_ind, h5_raw, labels_vec, data_slice, xform_func):
     # get all pixels with this label
     targ_pos = np.argwhere(labels_vec == clust_ind)
     # slice to get the responses for all these pixels, ensure that it's 2d
     data_chunk = np.atleast_2d(h5_raw[:, data_slice[1]][targ_pos, :])
     # transform to real from whatever type it was
     avg_data = np.mean(xform_func(data_chunk), axis=0, keepdims=True)
     # transform back to the source data type and insert into the mean response
     return np.squeeze(stack_real_to_target_dtype(avg_data, h5_raw.dtype))
Exemplo n.º 5
0
 def __mean_resp_for_cluster(clust_ind, h5_raw, labels_vec, data_slice, xform_func):
     # get all pixels with this label
     targ_pos = np.argwhere(labels_vec == clust_ind)
     # slice to get the responses for all these pixels, ensure that it's 2d
     data_chunk = np.atleast_2d(h5_raw[:, data_slice[1]][targ_pos, :])
     # transform to real from whatever type it was
     avg_data = np.mean(xform_func(data_chunk), axis=0, keepdims=True)
     # transform back to the source data type and insert into the mean response
     return np.squeeze(stack_real_to_target_dtype(avg_data, h5_raw.dtype))
Exemplo n.º 6
0
    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : :class:`numpy.ndarray`
            Abundance matrix
        S : :class:`numpy.ndarray`
            variance vector
        V : :class:`numpy.ndarray`
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0))
        if not success:
            raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success)

        v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success)

        return u_mat, self.__s, v_mat
Exemplo n.º 7
0
    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : :class:`numpy.ndarray`
            Abundance matrix
        S : :class:`numpy.ndarray`
            variance vector
        V : :class:`numpy.ndarray`
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0))
        if not success:
            raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success)

        v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success)

        return u_mat, self.__s, v_mat
Exemplo n.º 8
0
 def stack_real_to_target_compound_single(self):
     num_elems = 1
     structured_array = np.zeros(shape=num_elems, dtype=struc_dtype)
     structured_array['r'] = r_vals = np.random.random(size=num_elems)
     structured_array['g'] = g_vals = np.random.randint(0,
                                                        high=1024,
                                                        size=num_elems)
     structured_array['b'] = b_vals = np.random.random(size=num_elems)
     real_val = np.concatenate((r_vals, g_vals, b_vals))
     actual = dtype_utils.stack_real_to_target_dtype(real_val, struc_dtype)
     self.assertTrue(compare_structured_arrays(actual, structured_array[0]))
Exemplo n.º 9
0
    def test(self, override=False):
        """
        Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to  write to the file. Handles
        complex, compound datasets such that the
        components are of the same data-type as the input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        components : :class:`numpy.ndarray`
            Components
        projections : :class:`numpy.ndarray`
            Projections
        """
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \
                       USIDataset(self.h5_results_grp['Projection']).get_n_dim_form()

        self.h5_results_grp = None

        print('Performing Decomposition on {}.'.format(self.h5_main.name))

        t0 = time.time()
        self._fit()
        self._transform()
        print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name))

        self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype)
        projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds,
                                                    h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]),
                                                                           axis=0))
        if not success:
            raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success)

        components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds,
                                                    h5_pos=np.expand_dims(np.arange(self.__components.shape[0]),
                                                                          axis=1))

        if not success:
            raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success)

        return components_mat, projection_mat
Exemplo n.º 10
0
    def test(self, override=False):
        """
        Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to  write to the file. Handles
        complex, compound datasets such that the
        components are of the same data-type as the input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        components : :class:`numpy.ndarray`
            Components
        projections : :class:`numpy.ndarray`
            Projections
        """
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \
                       USIDataset(self.h5_results_grp['Projection']).get_n_dim_form()

        self.h5_results_grp = None

        print('Performing Decomposition on {}.'.format(self.h5_main.name))

        t0 = time.time()
        self._fit()
        self._transform()
        print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name))

        self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype)
        projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds,
                                                    h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]),
                                                                           axis=0))
        if not success:
            raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success)

        components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds,
                                                    h5_pos=np.expand_dims(np.arange(self.__components.shape[0]),
                                                                          axis=1))

        if not success:
            raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success)

        return components_mat, projection_mat
Exemplo n.º 11
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(
        components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)
    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError(
            'SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)
    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize +
                   h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size /
                                                1024.0**2))
    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print(
        'Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp,
                                    rebuild,
                                    'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'),
                                    get_attr(h5_main, 'units'),
                                    None,
                                    None,
                                    h5_pos_inds=h5_main.h5_pos_inds,
                                    h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds,
                                    h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks,
                                    compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(
            comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Exemplo n.º 12
0
 def test_cast_str_type(self):
     uint_array = np.random.randint(0, high=15, size=(3, 4, 5),
                                    dtype=np.uint16)
     actual = dtype_utils.stack_real_to_target_dtype(uint_array, np.dtype('<f4'))
     self.assertTrue(np.allclose(actual, np.float32(uint_array)))
Exemplo n.º 13
0
 def test_complex_single(self):
     expected = 4.32 + 5.67j
     real_val = [np.real(expected), np.imag(expected)]
     actual = dtype_utils.stack_real_to_target_dtype(real_val, np.complex)
     self.assertTrue(np.allclose(actual, expected))
Exemplo n.º 14
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)

    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)

    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2))

    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print('Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'),
                                    None, None,
                                    h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks, compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt