n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) y[np.where(y == 0)[0]] = -1. estimator = SVC(C=10, kernel='rbf', gamma=0.4, probability=True) pu_estimator = PUAdapter(estimator, hold_out_ratio=0.2) pu_estimator.fit(X, y) print pu_estimator print print "Comparison of estimator and PUAdapter(estimator):" print "Number of disagreements: ", len(np.where((pu_estimator.predict(X) == estimator.predict(X)) == False)[0]) print "Number of agreements: ", len(np.where((pu_estimator.predict(X) == estimator.predict(X)) == True)[0])
y_train_pu[sacrifice] = -1. print "PU transformation applied. We now have:" print len(np.where(y_train_pu == -1.)[0])," are bening" print len(np.where(y_train_pu == +1.)[0])," are malignant" print #Get f1 score with pu_learning print "PU learning in progress..." estimator = RandomForestClassifier(n_estimators=100, criterion='gini', bootstrap=True, n_jobs=1) pu_estimator = PUAdapter(estimator) pu_estimator.fit(X_train,y_train_pu) y_pred = pu_estimator.predict(X_test) precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred) pu_f1_scores.append(f1_score[1]) print "F1 score: ", f1_score[1] print "Precision: ", precision[1] print "Recall: ", recall[1] print #Get f1 score without pu_learning print "Regular learning in progress..." estimator = RandomForestClassifier(n_estimators=100, bootstrap=True, n_jobs=1) estimator.fit(X_train,y_train_pu) y_pred = estimator.predict(X_test) precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)