示例#1
0
    def preprocess_dataset(self, ds):
        """
        Preprocesses the dataset.
        :param ds: dataset
        :return: preprocessed dataset
        """
        if self.cfg.has('dataset_input') and self.cfg.dataset_input != 'input':
            ds['input'] = ds[self.cfg.dataset_input]
            del ds[self.cfg.dataset_input]
        if self.cfg.has('dataset_target') and self.cfg.dataset_target != 'target':
            ds['target'] = ds[self.cfg.dataset_target]
            del ds[self.cfg.dataset_target]

        if ds['input'].ndim == 3:
            # ensure that n_steps and valid are both in the dataset
            if 'n_steps' in ds and 'valid' not in ds:
                ds['valid'] = n_steps_to_valid(ds['n_steps'], ds['input'].shape[1])
            elif 'valid' in ds and 'n_steps' not in ds:
                ds['n_steps'] = valid_to_n_steps(ds['valid'])

        if self.cfg.has('dataset_samples'):
            ds['input'] = ds['input'][..., 0:self.cfg.dataset_samples]
            ds['target'] = ds['target'][..., 0:self.cfg.dataset_samples]
            print "Using only %d samples from dataset" % ds['input'].shape[-1]

        if self.cfg.get('no_negative_data'):
            minval = np.min(ds['input'])
            if minval < 0:
                print "Adding %.3f to dataset inputs to ensure positive values." % (-minval)
                ds['input'] -= minval
            else:
                print "Dataset inputs are already positive."

        if self.cfg.has('preprocess_pca'):
            ds['orig_input'] = np.copy(ds['input'])
            if ds['input'].ndim == 2:
                res = pca_white(ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True)
            elif ds['input'].ndim == 3:
                res = for_step_data(pca_white)(ds['n_steps'], ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True)
            else:
                raise ValueError("unrecognized dimensionality of  input variable")
            ds['input'], ds['meta_pca_vars'], ds['meta_pca_axes'], ds['meta_pca_means'] = res                
            print "Keeping %d principal components (PCA) with variances:" % self.cfg.preprocess_pca
            print ds['meta_pca_vars']
            np.savez_compressed(join(self.cfg.out_dir, "pca.npz"),
                                pca_vars=ds['meta_pca_vars'],
                                pca_axes=ds['meta_pca_axes'],
                                pca_means=ds['meta_pca_means'])

        if self.cfg.get('use_training_as_validation'):
            ds['meta_use_training_as_validation'] = self.cfg.use_training_as_validation

        return ds
示例#2
0
 def perform_pca(self, data):
     """
     Performs the same PCA whitening as done during preprocessing.
     :param data: data[feature, smpl] or data[feature, step, smpl]
     :return: whitened[comp, smpl] or whitened[comp, step, smpl]
     """
     if self.cfg.has('preprocess_pca'):
         if data.ndim == 2:
             return pca_white(data,
                              variances=self.dataset.meta_pca_vars,
                              axes=self.dataset.meta_pca_axes,
                              means=self.dataset.meta_pca_means)
         elif data.ndim == 3:
             n_steps = np.full((data.shape[2],), data.shape[1], dtype=int)
             return for_step_data(pca_white)(n_steps, data,
                                             variances=self.dataset.meta_pca_vars,
                                             axes=self.dataset.meta_pca_axes,
                                             means=self.dataset.meta_pca_means)
     else:
         return data
示例#3
0
 def perform_pca(self, data):
     """
     Performs the same PCA whitening as done during preprocessing.
     :param data: data[feature, smpl] or data[feature, step, smpl]
     :return: whitened[comp, smpl] or whitened[comp, step, smpl]
     """
     if self.cfg.has('preprocess_pca'):
         if data.ndim == 2:
             return pca_white(data,
                              variances=self.dataset.meta_pca_vars,
                              axes=self.dataset.meta_pca_axes,
                              means=self.dataset.meta_pca_means)
         elif data.ndim == 3:
             n_steps = np.full((data.shape[2], ), data.shape[1], dtype=int)
             return for_step_data(pca_white)(
                 n_steps,
                 data,
                 variances=self.dataset.meta_pca_vars,
                 axes=self.dataset.meta_pca_axes,
                 means=self.dataset.meta_pca_means)
     else:
         return data
示例#4
0
    def preprocess_dataset(self, ds):
        """
        Preprocesses the dataset.
        :param ds: dataset
        :return: preprocessed dataset
        """
        if self.cfg.has('dataset_input') and self.cfg.dataset_input != 'input':
            ds['input'] = ds[self.cfg.dataset_input]
            del ds[self.cfg.dataset_input]
        if self.cfg.has(
                'dataset_target') and self.cfg.dataset_target != 'target':
            ds['target'] = ds[self.cfg.dataset_target]
            del ds[self.cfg.dataset_target]

        if ds['input'].ndim == 3:
            # ensure that n_steps and valid are both in the dataset
            if 'n_steps' in ds and 'valid' not in ds:
                ds['valid'] = n_steps_to_valid(ds['n_steps'],
                                               ds['input'].shape[1])
            elif 'valid' in ds and 'n_steps' not in ds:
                ds['n_steps'] = valid_to_n_steps(ds['valid'])

        if self.cfg.has('dataset_samples'):
            ds['input'] = ds['input'][..., 0:self.cfg.dataset_samples]
            ds['target'] = ds['target'][..., 0:self.cfg.dataset_samples]
            print "Using only %d samples from dataset" % ds['input'].shape[-1]

        if self.cfg.get('no_negative_data'):
            minval = np.min(ds['input'])
            if minval < 0:
                print "Adding %.3f to dataset inputs to ensure positive values." % (
                    -minval)
                ds['input'] -= minval
            else:
                print "Dataset inputs are already positive."

        if self.cfg.has('preprocess_pca'):
            ds['orig_input'] = np.copy(ds['input'])
            if ds['input'].ndim == 2:
                res = pca_white(ds['input'],
                                n_components=self.cfg.preprocess_pca,
                                return_axes=True)
            elif ds['input'].ndim == 3:
                res = for_step_data(pca_white)(
                    ds['n_steps'],
                    ds['input'],
                    n_components=self.cfg.preprocess_pca,
                    return_axes=True)
            else:
                raise ValueError(
                    "unrecognized dimensionality of  input variable")
            ds['input'], ds['meta_pca_vars'], ds['meta_pca_axes'], ds[
                'meta_pca_means'] = res
            print "Keeping %d principal components (PCA) with variances:" % self.cfg.preprocess_pca
            print ds['meta_pca_vars']
            np.savez_compressed(join(self.cfg.out_dir, "pca.npz"),
                                pca_vars=ds['meta_pca_vars'],
                                pca_axes=ds['meta_pca_axes'],
                                pca_means=ds['meta_pca_means'])

        if self.cfg.get('use_training_as_validation'):
            ds['meta_use_training_as_validation'] = self.cfg.use_training_as_validation

        return ds