def test_fit(): """Test for fitting the model.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) assert clf.is_trained
def test_predict(): """Test for making predictions.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) u_pred = clf.predict(Z) labels = np.unique(y) assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
def run_6(): X = np.random.randn(10, 2) y = np.vstack((-np.ones((5, )), np.ones((5, )))) Z = np.random.randn(10, 2) from libtlda.iw import ImportanceWeightedClassifier clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm') clf.fit(X, y, Z) u_pred = clf.predict(Z) print(u_pred)
def model_build(classifier, trian_features, train_labels, test_features): if classifier == "IW": # Call an importance-weighted classifier # pipe = make_pipeline(StandardScaler(), ImportanceWeightedClassifier(iwe='nn')) # param_grid = [{}] # model = GridSearchCV(pipe, param_grid, cv=3) # model.fit(trian_features, train_labels, test_features) # print(model.best_params_) model = ImportanceWeightedClassifier(iwe='kmm') model.fit(preprocessing.scale(trian_features), train_labels, preprocessing.scale(test_features)) elif classifier == "SUBA": # Classifier based on subspace alignment model = SubspaceAlignedClassifier(loss_function='logistic') model.fit(preprocessing.scale(trian_features), train_labels, preprocessing.scale(test_features)) elif classifier == "TCPR": # Target Contrastive Pessimistic Classifier model = TargetContrastivePessimisticClassifier(l2=0.1) model.fit(preprocessing.scale(trian_features), train_labels, preprocessing.scale(test_features)) else: if classifier == "LR": pipe = make_pipeline(StandardScaler(), LogisticRegression()) param_grid = [{'logisticregression__C': [1, 10, 100]}] elif classifier == "SVM": # pipe = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)) SVC(kernel='linear',probability=True) pipe = make_pipeline(StandardScaler(), SVC(kernel='linear', probability=True)) param_grid = [{'svc__C': [0.01, 0.1, 1]}] elif classifier == "RF": pipe = make_pipeline(StandardScaler(), RandomForestClassifier(max_features='sqrt')) param_grid = { 'randomforestclassifier__n_estimators': range(230, 300, 10), 'randomforestclassifier__max_depth': range(8, 12, 1), 'randomforestclassifier__min_samples_leaf': range(1, 5, 1), 'randomforestclassifier__max_features': range(1, 20, 1) # 'learning_rate': np.linspace(0.01, 2, 20), # 'subsample': np.linspace(0.7, 0.9, 20), # 'colsample_bytree': np.linspace(0.5, 0.98, 10), # 'min_child_weight': range(1, 9, 1) } model = GridSearchCV(pipe, param_grid, cv=3) model.fit(trian_features, train_labels) print(model.best_params_) # save the model model_file_name = classifier + time.strftime("%m%d-%H%M%S") + ".model" joblib.dump(filename=model_file_name, value=model) return model
def apply_NN(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_NN, acc_DT_NN_INFO = check_accuracy(testY, pred_naive) # Logistic Regression print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_NN, acc_LR_NN_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_NN, acc_NB_NN_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_NN': acc_LR_NN, 'acc_LR_NN_INFO': str(acc_LR_NN_INFO), 'acc_DT_NN': acc_DT_NN, 'acc_DT_NN_INFO': str(acc_DT_NN_INFO), 'acc_NB_NN': acc_NB_NN, 'acc_NB_NN_INFO': str(acc_NB_NN_INFO), }] )
def apply_KMM(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Kernel Mean Matching (Huang et al., 2006) ") classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_KMM, acc_DT_KMM_INFO = check_accuracy(testY, pred_naive) # Logistic Regression classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_KMM, acc_LR_KMM_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_KMM, acc_NB_KMM_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_KMM': acc_LR_KMM, 'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO), 'acc_DT_KMM': acc_DT_KMM, 'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO), 'acc_NB_KMM': acc_NB_KMM, 'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO), }] )
# Classifier based on subspace alignment clf = StructuralCorrespondenceClassifier(num_pivots=2, num_components=1) elif classifier == 'rba': # Robust bias-aware classifier clf = RobustBiasAwareClassifier(l2=0.1, max_iter=1000) elif classifier == 'flda': # Feature-level domain-adaptive classifier clf = FeatureLevelDomainAdaptiveClassifier(l2=0.1, max_iter=1000) elif classifier == 'tcpr': # Target Contrastive Pessimistic Classifier clf = TargetContrastivePessimisticClassifier(l2=0.1, tolerance=1e-20) else: raise ValueError('Classifier not recognized.') # Train classifier clf.fit(X, y, Z) # Make predictions pred_adapt = clf.predict(Z) # Compute error rates err_naive = np.mean(pred_naive != u, axis=0) err_adapt = np.mean(pred_adapt != u, axis=0) # Report results print('Error naive: ' + str(err_naive)) print('Error adapt: ' + str(err_adapt))
# Split out training data trn_X = X[trn_index, :] trn_Y = Y[trn_index] # Split out test data tst_X = X[tst_index, :] tst_Y = Y[tst_index] # Define classifiers clf_n = linear_model.LogisticRegression(C=0.1) clf_a = ImportanceWeightedClassifier(loss='logistic', l2=0.1) # Train classifier on data from current and previous days clf_n.fit(trn_X, trn_Y) clf_a.fit(trn_X, trn_Y, tst_X) # Make predictions preds_n = clf_n.predict(tst_X) preds_a = clf_a.predict(tst_X) # Test on data from current day and store perf_n.append(np.mean(preds_n != tst_Y)) perf_a.append(np.mean(preds_a != tst_Y)) # Store day and rumour days_array.append(days[d]) rums_array.append(rumour) # Compact to DataFrame performance = pd.DataFrame({
def build_models(trainX, trainY, testX, testY, source_pos, target_pos, window): ####################### ### SEMI-SUPERVISED ### ######################## # Label Propagation label_prop_model = LabelPropagation(kernel='knn') label_prop_model.fit(trainX, trainY) Y_Pred = label_prop_model.predict(testX) acc_ss_propagation, acc_ss_propagation_INFO = checkAccuracy(testY, Y_Pred) # Label Spreading label_prop_models_spr = LabelSpreading(kernel='knn') label_prop_models_spr.fit(trainX, trainY) Y_Pred = label_prop_models_spr.predict(testX) acc_ss_spreading, acc_ss_spreading_INFO = checkAccuracy(testY, Y_Pred) ######################## #### WITHOUT TL ######## ######################## # LogisticRegression modelLR = LogisticRegression() modelLR.fit(trainX, trainY) predLR = modelLR.predict(testX) accLR, acc_LR_INFO = checkAccuracy(testY, predLR) # DecisionTreeClassifier modelDT = tree.DecisionTreeClassifier() modelDT.fit(trainX, trainY) predDT = modelDT.predict(testX) accDT, acc_DT_INFO = checkAccuracy(testY, predDT) # BernoulliNB modelNB = BernoulliNB() modelNB.fit(trainX, trainY) predND = modelNB.predict(testX) accNB, acc_NB_INFO = checkAccuracy(testY, predND) # print("WITHOUT TL ACC_LR:", accLR, " ACC_DT:", accDT, " ACC_NB:", accNB) ######################## #### WITH TL ######## ######################## #################################################### ### Kernel Mean Matching (Huang et al., 2006) ### # Decision Tree print("\n Kernel Mean Matching (Huang et al., 2006) ") classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_KMM, acc_DT_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_KMM) # Logistic Regression classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_KMM, acc_LR_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_KMM) # Naive Bayes Bernoulli classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_KMM, acc_NB_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_KMM) #################################################### ### Nearest-neighbour-based weighting (Loog, 2015) ### # Decision Tree print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_NN, acc_DT_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_NN) # Logistic Regression print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_NN, acc_LR_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_NN) # Naive Bayes Bernoulli print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_NN, acc_NB_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_NN) #################################################### ### Transfer Component Analysis (Pan et al, 2009) ### # Decision Tree print("\n Transfer Component Analysis (Pan et al, 2009)") classifier = TransferComponentClassifier(loss="dtree", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_TCA, acc_DT_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_TCA) # Logistic Regression classifier = TransferComponentClassifier(loss="logistic", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_TCA, acc_LR_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_TCA) # Naive Bayes Bernoulli classifier = TransferComponentClassifier(loss="berno", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_TCA, acc_NB_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_TCA) #################################################### ### Subspace Alignment (Fernando et al., 2013) ### # Decision Tree print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_SA, acc_DT_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_SA) # Logistic Regression print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_SA, acc_LR_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_SA) # Naive Bayes Bernoulli print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_SA, acc_NB_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_SA) ################################# ############# ENSEMBLE ########## ################################# classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree") classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic") classifier_SA_NB = SubspaceAlignedClassifier(loss="berno") classifier_TCA_DT = TransferComponentClassifier(loss="dtree") classifier_TCA_LR = TransferComponentClassifier(loss="logistic") classifier_TCA_NB = TransferComponentClassifier(loss="berno") classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno") # eclf = EnsembleClassifier( clfs=[classifier_TCA_DT, classifier_NN_DT, classifier_KMM_DT]) eclf.fit(trainX, trainY, testX) pred = eclf.predict_v2(testX) acc_ENSEMBLE, acc_ENSEMBLE_INFO = checkAccuracy(testY, pred) ######################## #### RETURN ######## ######################## return pd.DataFrame([{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_SS_propagation': acc_ss_propagation, 'acc_SS_propagation_INFO': acc_ss_propagation_INFO, 'acc_SS_spreading': acc_ss_spreading, 'acc_SS_spreading_INFO': acc_ss_spreading_INFO, 'acc_ENSEMBLE': acc_ENSEMBLE, 'acc_LR': accLR, 'acc_LR_INFO': str(acc_LR_INFO), 'acc_DT': accDT, 'acc_DT_INFO': str(acc_DT_INFO), 'acc_NB': accNB, 'acc_NB_INFO': str(acc_NB_INFO), 'acc_LR_KMM': acc_LR_KMM, 'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO), 'acc_LR_NN': acc_LR_NN, 'acc_LR_NN_INFO': str(acc_LR_NN_INFO), 'acc_LR_TCA': acc_LR_TCA, 'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO), 'acc_LR_SA': acc_LR_SA, 'acc_LR_SA_INFO': str(acc_LR_SA_INFO), 'acc_DT_KMM': acc_DT_KMM, 'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO), 'acc_DT_NN': acc_DT_NN, 'acc_DT_NN_INFO': str(acc_DT_NN_INFO), 'acc_DT_TCA': acc_DT_TCA, 'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO), 'acc_DT_SA': acc_DT_SA, 'acc_DT_SA_INFO': str(acc_DT_SA_INFO), 'acc_NB_KMM': acc_NB_KMM, 'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO), 'acc_NB_NN': acc_NB_NN, 'acc_NB_NN_INFO': str(acc_NB_NN_INFO), 'acc_NB_TCA': acc_NB_TCA, 'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO), 'acc_NB_SA': acc_NB_SA, 'acc_NB_SA_INFO': str(acc_NB_SA_INFO) }])
target_probas_ = clf.predict(Y) #print(classes_test.shape,Y.shape,n_test_samples) accuracy_targ = sum((target_probas_>0.5)==classes_test)/(1.0*n_test_samples) print (subject,target_subj,index,l,iw,accuracy_org,accuracy_targ) elif mode==3: # Evaluation of classifiers using the full training set as test set y = classes_train nsamples = y.shape[0] n_test_samples = classes_test.shape[0] for index in [1,2,6,7,8,9,10,11,12,13]: #range(1,n_classifiers): for iw in [0,1,2,3]: iwe = weighting_functions[iw] w_clf = ImportanceWeightedClassifier(iwe=iwe) X = np.asarray(MEG_data_train) Y = np.asarray(MEG_data_test) w_clf.fit(X,y,Y) if iwe == 'lr': w = w_clf.iwe_logistic_discrimination(X, Y) elif iwe == 'rg': w = w_clf.iwe_ratio_gaussians(X, Y) elif iwe == 'nn': w = w_clf.iwe_nearest_neighbours(X, Y) elif iwe == 'kde': w = w_clf.iwe_kernel_densities(X, Y) elif iwe == 'kmm': w = w_clf.iwe_kernel_mean_matching(X, Y) else: raise NotImplementedError('Estimator not implemented.') clf = Weighted_InitClassifier(index) # Find importance-weights
def run_5(n_samples=100): from libtlda.iw import ImportanceWeightedClassifier clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm') # X ~ N(0.5, 0.5²) # Z ~ N(0.0, 0.3²) x = np.random.normal(0.5, 0.5**2, (n_samples, 1)) z = np.random.normal(0, 0.3**2, (n_samples, 1)) x_noise = np.random.normal(0, 0.07, (n_samples, 1)) z_noise = np.random.normal(0, 0.03, (n_samples, 1)) def data_func(var): return var**3 - var y = data_func(x) y = np.array(y) y = y.ravel() # + bruit X = x + x_noise Z = z + z_noise # distribution différente à approximer avec une distrib initiale y_bis = data_func(z) y_bis = np.array(y_bis) y_bis = y_bis.ravel() print(X.shape) print(y.shape) print(Z.shape) print(y_bis.shape) clf.fit(X, y, Z) preds = clf.predict(Z) print(np.linalg.norm(preds - Z)) from sklearn.linear_model import LinearRegression clf_linear = LinearRegression() clf_linear.fit(Z, y_bis) true_coefs = clf_linear.coef_ # print(clf.get_weights()) print(preds) # plot facilities x_range = np.linspace(-0.4, 1.2, 100) kmm_line = x_range * preds true_line = x_range * true_coefs plt.axis([-0.4, 1.2, -0.5, 1]) plt.scatter(X, y, label='X points', color='blue', marker='o') plt.plot(x_range, data_func(x_range), label='X distribution', color='blue') plt.scatter(Z, y_bis, label='Z points', color='red', marker='+') plt.plot(x_range, kmm_line, label='Z kmm regression line', color='red') plt.plot(x_range, true_line, label='Z OLS line', color='black') plt.legend() plt.show() """