def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2]) classifiers = [ DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state) ] self.clf = SimpleClassifierAggregator(classifiers, method='average', weights=clf_weights) self.clf.fit(self.X_train, self.y_train)
SVC(probability=True), KNeighborsClassifier() ] # Define some combination methods to be compared classifiers = { 'Logistic Regression': LogisticRegression(), 'Gaussian NB': GaussianNB(), 'Support Vector Machine': SVC(probability=True), 'k Nearst Neighbors': KNeighborsClassifier(), 'Simple Average': SimpleClassifierAggregator(base_estimators=classifiers, method='average'), 'Simple Maximization': SimpleClassifierAggregator(base_estimators=classifiers, method='maximization'), 'Stacking': Stacking(base_estimators=classifiers, shuffle_data=True), 'Stacking_RF': Stacking(base_estimators=classifiers, shuffle_data=True, meta_clf=RandomForestClassifier(random_state=random_state)) } # Show all classifiers for i, clf in enumerate(classifiers.keys()): print('Model', i + 1, clf)
clf.fit(X_train, y_train) evaluate_print('Random Forest |', y_test, clf.predict(X_test)) print() # initialize a group of classifiers classifiers = [ DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state) ] # combine by averaging clf = SimpleClassifierAggregator(classifiers, method='average') clf.fit(X_train, y_train) y_test_predicted = clf.predict(X_test) evaluate_print('Combination by avg |', y_test, y_test_predicted) # combine by weighted averaging clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2]) clf = SimpleClassifierAggregator(classifiers, method='average', weights=clf_weights) clf.fit(X_train, y_train) y_test_predicted = clf.predict(X_test) evaluate_print('Combination by w_avg |', y_test, y_test_predicted) # combine by maximization clf = SimpleClassifierAggregator(classifiers, method='maximization')
clf.predict_proba(X_test)[:, 1]), decimals=4)) print() # initialize a group of classifiers classifiers = [ DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state) ] # combine by averaging clf = SimpleClassifierAggregator(classifiers, method='average') clf.fit(X_train, y_train) y_test_predicted = clf.predict_proba(X_test) print( 'Combination by avg |', np.round(roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]), decimals=4)) # combine by weighted averaging clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2]) clf = SimpleClassifierAggregator(classifiers, method='average') clf.fit(X_train, y_train) y_test_predicted = clf.predict_proba(X_test) print( 'Combination by w_avg|',