def apply(self, dataset, can_fit=False): if self._pca is None: if not can_fit: raise ValueError("can_fit is False, but PCA preprocessor " "object has no fitted model stored") from pylearn2 import pca self._pca = pca.CovEigPCA(self._num_components) self._pca.train(dataset.get_design_matrix()) self._transform_func = function([self._input], self._pca(self._input)) self._invert_func = function([self._output], self._pca.reconstruct(self._output)) self._convert_weights_func = function([self._output], self._pca.reconstruct( self._output, add_mean=False)) orig_data = dataset.get_design_matrix() dataset.set_design_matrix( self._transform_func(dataset.get_design_matrix())) proc_data = dataset.get_design_matrix() orig_var = orig_data.var(axis=0) proc_var = proc_data.var(axis=0) assert proc_var[0] > orig_var.max() # TODO: logging print 'original variance: ' + str(orig_var.sum()) print 'processed variance: ' + str(proc_var.sum()) if dataset.view_converter is not None: new_converter = PCA_ViewConverter(self._transform_func, self._invert_func, self._convert_weights_func, dataset.view_converter) dataset.view_converter = new_converter
def apply(self, dataset, can_fit=False): if self.pca is None: assert can_fit from pylearn2 import pca self.pca = pca.CovEigPCA(self.num_components) self.pca.train(dataset.get_design_matrix()) self.transform_func = function([self.input], self.pca(self.input)) self.invert_func = function([self.output], self.pca.reconstruct(self.output)) self.convert_weights_func = function([self.output], self.pca.reconstruct( self.output, add_mean=False)) # orig_data = dataset.get_design_matrix() #rm dataset.set_design_matrix( self.transform_func(dataset.get_design_matrix())) proc_data = dataset.get_design_matrix() #rm orig_var = orig_data.var(axis=0) proc_var = proc_data.var(axis=0) assert proc_var[0] > orig_var.max() print 'original variance: ' + str(orig_var.sum()) print 'processed variance: ' + str(proc_var.sum()) dataset.view_converter = PCA_ViewConverter(self.transform_func, self.invert_func, self.convert_weights_func, dataset.view_converter)