def preprocess_dataset(self, ds): """ Preprocesses the dataset. :param ds: dataset :return: preprocessed dataset """ if self.cfg.has('dataset_input') and self.cfg.dataset_input != 'input': ds['input'] = ds[self.cfg.dataset_input] del ds[self.cfg.dataset_input] if self.cfg.has('dataset_target') and self.cfg.dataset_target != 'target': ds['target'] = ds[self.cfg.dataset_target] del ds[self.cfg.dataset_target] if ds['input'].ndim == 3: # ensure that n_steps and valid are both in the dataset if 'n_steps' in ds and 'valid' not in ds: ds['valid'] = n_steps_to_valid(ds['n_steps'], ds['input'].shape[1]) elif 'valid' in ds and 'n_steps' not in ds: ds['n_steps'] = valid_to_n_steps(ds['valid']) if self.cfg.has('dataset_samples'): ds['input'] = ds['input'][..., 0:self.cfg.dataset_samples] ds['target'] = ds['target'][..., 0:self.cfg.dataset_samples] print "Using only %d samples from dataset" % ds['input'].shape[-1] if self.cfg.get('no_negative_data'): minval = np.min(ds['input']) if minval < 0: print "Adding %.3f to dataset inputs to ensure positive values." % (-minval) ds['input'] -= minval else: print "Dataset inputs are already positive." if self.cfg.has('preprocess_pca'): ds['orig_input'] = np.copy(ds['input']) if ds['input'].ndim == 2: res = pca_white(ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True) elif ds['input'].ndim == 3: res = for_step_data(pca_white)(ds['n_steps'], ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True) else: raise ValueError("unrecognized dimensionality of input variable") ds['input'], ds['meta_pca_vars'], ds['meta_pca_axes'], ds['meta_pca_means'] = res print "Keeping %d principal components (PCA) with variances:" % self.cfg.preprocess_pca print ds['meta_pca_vars'] np.savez_compressed(join(self.cfg.out_dir, "pca.npz"), pca_vars=ds['meta_pca_vars'], pca_axes=ds['meta_pca_axes'], pca_means=ds['meta_pca_means']) if self.cfg.get('use_training_as_validation'): ds['meta_use_training_as_validation'] = self.cfg.use_training_as_validation return ds
def perform_pca(self, data): """ Performs the same PCA whitening as done during preprocessing. :param data: data[feature, smpl] or data[feature, step, smpl] :return: whitened[comp, smpl] or whitened[comp, step, smpl] """ if self.cfg.has('preprocess_pca'): if data.ndim == 2: return pca_white(data, variances=self.dataset.meta_pca_vars, axes=self.dataset.meta_pca_axes, means=self.dataset.meta_pca_means) elif data.ndim == 3: n_steps = np.full((data.shape[2],), data.shape[1], dtype=int) return for_step_data(pca_white)(n_steps, data, variances=self.dataset.meta_pca_vars, axes=self.dataset.meta_pca_axes, means=self.dataset.meta_pca_means) else: return data
def perform_pca(self, data): """ Performs the same PCA whitening as done during preprocessing. :param data: data[feature, smpl] or data[feature, step, smpl] :return: whitened[comp, smpl] or whitened[comp, step, smpl] """ if self.cfg.has('preprocess_pca'): if data.ndim == 2: return pca_white(data, variances=self.dataset.meta_pca_vars, axes=self.dataset.meta_pca_axes, means=self.dataset.meta_pca_means) elif data.ndim == 3: n_steps = np.full((data.shape[2], ), data.shape[1], dtype=int) return for_step_data(pca_white)( n_steps, data, variances=self.dataset.meta_pca_vars, axes=self.dataset.meta_pca_axes, means=self.dataset.meta_pca_means) else: return data
def preprocess_dataset(self, ds): """ Preprocesses the dataset. :param ds: dataset :return: preprocessed dataset """ if self.cfg.has('dataset_input') and self.cfg.dataset_input != 'input': ds['input'] = ds[self.cfg.dataset_input] del ds[self.cfg.dataset_input] if self.cfg.has( 'dataset_target') and self.cfg.dataset_target != 'target': ds['target'] = ds[self.cfg.dataset_target] del ds[self.cfg.dataset_target] if ds['input'].ndim == 3: # ensure that n_steps and valid are both in the dataset if 'n_steps' in ds and 'valid' not in ds: ds['valid'] = n_steps_to_valid(ds['n_steps'], ds['input'].shape[1]) elif 'valid' in ds and 'n_steps' not in ds: ds['n_steps'] = valid_to_n_steps(ds['valid']) if self.cfg.has('dataset_samples'): ds['input'] = ds['input'][..., 0:self.cfg.dataset_samples] ds['target'] = ds['target'][..., 0:self.cfg.dataset_samples] print "Using only %d samples from dataset" % ds['input'].shape[-1] if self.cfg.get('no_negative_data'): minval = np.min(ds['input']) if minval < 0: print "Adding %.3f to dataset inputs to ensure positive values." % ( -minval) ds['input'] -= minval else: print "Dataset inputs are already positive." if self.cfg.has('preprocess_pca'): ds['orig_input'] = np.copy(ds['input']) if ds['input'].ndim == 2: res = pca_white(ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True) elif ds['input'].ndim == 3: res = for_step_data(pca_white)( ds['n_steps'], ds['input'], n_components=self.cfg.preprocess_pca, return_axes=True) else: raise ValueError( "unrecognized dimensionality of input variable") ds['input'], ds['meta_pca_vars'], ds['meta_pca_axes'], ds[ 'meta_pca_means'] = res print "Keeping %d principal components (PCA) with variances:" % self.cfg.preprocess_pca print ds['meta_pca_vars'] np.savez_compressed(join(self.cfg.out_dir, "pca.npz"), pca_vars=ds['meta_pca_vars'], pca_axes=ds['meta_pca_axes'], pca_means=ds['meta_pca_means']) if self.cfg.get('use_training_as_validation'): ds['meta_use_training_as_validation'] = self.cfg.use_training_as_validation return ds