def test_rank1_symmetric_convex_solver(): XYXY_rank1, XYXY_missing_rank1 = create_rank1_data(symmetric=True) solver = NuclearNormMinimization(require_symmetric_solution=True) completed = solver.complete(XYXY_missing_rank1) assert abs(completed[1, 2] - XYXY_rank1[1, 2]) < 0.01, \ "Expected %0.4f but got %0.4f" % ( XYXY_rank1[1, 2], completed[1, 2])
def test_rank1_convex_solver(): XY_rank1, XY_missing_rank1 = create_rank1_data(symmetric=False) solver = NuclearNormMinimization(max_iters=50000) XY_completed_rank1 = solver.complete(XY_missing_rank1) assert abs(XY_completed_rank1[1, 2] - XY_rank1[1, 2]) < 0.01, \ "Expected %0.4f but got %0.4f" % ( XY_rank1[1, 2], XY_completed_rank1[1, 2])
def test_rank1_symmetric_convex_solver(): XYXY_rank1, XYXY_missing_rank1 = create_rank1_data(symmetric=True) solver = NuclearNormMinimization(require_symmetric_solution=True) completed = solver.complete(XYXY_missing_rank1) assert abs(completed[1, 2] - XYXY_rank1[1, 2]) < 0.001, \ "Expected %0.4f but got %0.4f" % ( XYXY_rank1[1, 2], completed[1, 2])
def test_nuclear_norm_minimization_with_low_rank_random_matrix(): solver = NuclearNormMinimization(max_iters=2000) XY_completed = solver.complete(XY_incomplete[:100]) _, missing_mae = reconstruction_error(XY[:100], XY_completed, missing_mask[:100], name="NuclearNorm") assert missing_mae < 0.1, "Error too high!"
from fancyimpute import NuclearNormMinimization solver = NuclearNormMinimization( min_value=0.0, max_value=1.0, error_tolerance=0.0005) # X_incomplete has missing data which is represented with NaN values X_filled = solver.complete(X_incomplete)
from sklearn.preprocessing import Imputer imp = Imputer(missing_values='NaN', strategy="mean", axis=0) #strategy: "mean" or "median" or "most_frequent" train['N30_missing_imputed'] = imp.fit_transform(train['N30'].values.reshape( -1, 1)) imp.fit_transform( train.iloc[:, 1:]) #Removing first column as it is a text variable #Reference: https://pypi.python.org/pypi/fancyimpute/0.0.4 #pip3 install fancyimpute #ONLY NUMERIC VALUES from fancyimpute import NuclearNormMinimization, KNN, MICE solver = NuclearNormMinimization(min_value=0.0, max_value=1.0, error_tolerance=0.0005) X_filled = solver.complete(train['N30'].values.reshape(-1, 1)) X_filled = solver.complete(train) X_filled_knn = KNN(k=3).complete(train) #https://github.com/hammerlab/fancyimpute/blob/master/fancyimpute/mice.py X_filled_mice = MICE().complete(train.as_matrix()) X_filled_mice_df = pd.DataFrame(X_filled_mice) X_filled_mice_df.columns = train.columns X_filled_mice_df.index = train.index #Other methods: SimpleFill, SoftImpute, IterativeSVD, MICE, MatrixFactorization, NuclearNormMinimization, KNN, BiScaler #SimpleFill: uses mean or median; SoftImpute: Matrix completion; ###Smote #Only numeric/boolean and non_null values as input to TSNE model :: BETTER TRY THIS AFTER MISSING VALUE IMPUTATION AND ENCODING from imblearn.over_sampling import SMOTE sm = SMOTE(random_state=42) X_train_new, y_train_new = sm.fit_sample(train.dropna().iloc[:, 1:44],
def test_nuclear_norm_minimization_with_low_rank_random_matrix(): solver = NuclearNormMinimization(require_symmetric_solution=False) XY_completed = solver.complete(XY_incomplete[:100]) _, missing_mae = reconstruction_error( XY[:100], XY_completed, missing_mask[:100], name="NuclearNorm") assert missing_mae < 0.1, "Error too high!"