def test_solver_fill_methods_with_low_rank_random_matrix(): for fill_method in ("zero", "mean", "median", "min", "random"): imputer = SimpleFill(fill_method=fill_method) XY_completed = imputer.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="Solver with fill_method=%s" % fill_method) assert missing_mae < 5, "Error too high for Solver with %s fill method!" % fill_method
def run_impute(self, X, state='train'): if state == 'train': self.train_data['ave'] = np.zeros([X.shape[0], X.shape[1]]) for imp_method in self.impute_method: if imp_method == 'mean': imp_ope = SimpleFill() if imp_method == 'KNN': imp_ope = KNN() if imp_method == 'IterativeSVD': imp_ope = IterativeSVD() if imp_method == 'MatrixFactorization': imp_ope = MatrixFactorization() X_filled = imp_ope.fit_transform(X) self.train_data[imp_method] = X_filled self.impute_operator[imp_method] = imp_ope self.train_data['ave'] += X_filled self.train_data['ave'] /= len(self.impute_method) return 0
SimpleFill) n = 200 m = 20 inner_rank = 4 X = np.dot(np.random.randn(n, inner_rank), np.random.randn(inner_rank, m)) print("Mean squared element: %0.4f" % (X**2).mean()) # X is a data matrix which we're going to randomly drop entries from missing_mask = np.random.rand(*X.shape) < 0.1 X_incomplete = X.copy() # missing entries indicated with NaN X_incomplete[missing_mask] = np.nan meanFill = SimpleFill("mean") X_filled_mean = meanFill.fit_transform(X_incomplete) # Use 3 nearest rows which have a feature to fill in each row's missing features knnImpute = KNN(k=3) X_filled_knn = knnImpute.fit_transform(X_incomplete) # matrix completion using convex optimization to find low-rank solution # that still matches observed values. Slow! X_filled_nnm = NuclearNormMinimization().fit_transform(X_incomplete) # Instead of solving the nuclear norm objective directly, instead # induce sparsity using singular value thresholding softImpute = SoftImpute() # simultaneously normalizes the rows and columns of your observed data, # sometimes useful for low-rank imputation methods