n_features=20, 
                               n_informative=2, 
                               n_redundant=2, 
                               n_repeated=0, 
                               n_classes=2, 
                               n_clusters_per_class=2, 
                               weights=None, 
                               flip_y=0.01, 
                               class_sep=1.0, 
                               hypercube=True, 
                               shift=0.0, 
                               scale=1.0, 
                               shuffle=True, 
                               random_state=None)
    
    y[np.where(y == 0)[0]] = -1.
    
    estimator = SVC(C=10,
                    kernel='rbf',
                    gamma=0.4,
                    probability=True)
    pu_estimator = PUAdapter(estimator, hold_out_ratio=0.2)
    
    pu_estimator.fit(X, y)
    
    print pu_estimator
    print
    print "Comparison of estimator and PUAdapter(estimator):"
    print "Number of disagreements: ", len(np.where((pu_estimator.predict(X) == estimator.predict(X)) == False)[0])
    print "Number of agreements: ", len(np.where((pu_estimator.predict(X) == estimator.predict(X)) == True)[0])
Exemplo n.º 2
0
 sacrifice = pos[:n_sacrifice]
 y_train_pu[sacrifice] = -1.
 
 print "PU transformation applied. We now have:"
 print len(np.where(y_train_pu == -1.)[0])," are bening"
 print len(np.where(y_train_pu == +1.)[0])," are malignant"
 print
 
 #Get f1 score with pu_learning
 print "PU learning in progress..."
 estimator = RandomForestClassifier(n_estimators=100,
                                    criterion='gini', 
                                    bootstrap=True,
                                    n_jobs=1)
 pu_estimator = PUAdapter(estimator)
 pu_estimator.fit(X_train,y_train_pu)
 y_pred = pu_estimator.predict(X_test)
 precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)
 pu_f1_scores.append(f1_score[1])
 print "F1 score: ", f1_score[1]
 print "Precision: ", precision[1]
 print "Recall: ", recall[1]
 print
 
 #Get f1 score without pu_learning
 print "Regular learning in progress..."
 estimator = RandomForestClassifier(n_estimators=100,
                                    bootstrap=True,
                                    n_jobs=1)
 estimator.fit(X_train,y_train_pu)
 y_pred = estimator.predict(X_test)