def worker(d_indx, key, data): print("Dataset: %s start" % key) X, y = data # b = GaussianNB() # b = DecisionTreeClassifier(random_state=1410) b = KNeighborsClassifier(weights='distance') # b = MLPClassifier(random_state=1410) # b = SVC(kernel='rbf', probability=False, random_state=1410) n_estimators = 50 acc_prob = False base_clf = StratifiedBagging(base_estimator=b, ensemble_size=n_estimators, acc_prob=True, random_state=1410) clfs = { "MV-kNN": StratifiedBagging(base_estimator=b, ensemble_size=n_estimators, acc_prob=False, random_state=1410), "ACC-kNN": StratifiedBagging(base_estimator=b, ensemble_size=n_estimators, acc_prob=True, random_state=1410), "PRUNE-kNN": CMOE(base_estimator=base_clf, random_state=1410, diversity="kw", hard_voting=acc_prob) } n_splits = 5 n_repeats = 1 rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=1410) scores = np.zeros((32, n_splits * n_repeats, len(metrics))) for fold_id, (train, test) in enumerate(rskf.split(X, y)): print("FOLD %i: " % fold_id) for clf_id, clf_name in enumerate(clfs): clf = clone(clfs[clf_name]) clf = clfs[clf_name] clf.fit(X[train], y[train]) y_pred = clf.predict(X[test]) if clf_id < 2: for m_indx, (name, metric) in enumerate(metrics.items()): scores[clf_id, fold_id, m_indx] = metric(y[test], y_pred) else: for e_n, ensemble_pred in enumerate(y_pred): # print(ensemble_pred) for m_indx, (name, metric) in enumerate(metrics.items()): scores[clf_id + e_n, fold_id, m_indx] = metric(y[test], ensemble_pred) np.save("results/knn/%s_knn" % key, scores) print("Dataset: %s end" % key)
def worker(d_indx, key, data): print("Dataset: %s start" % key) X, y = data b = GaussianNB() # b = DecisionTreeClassifier(random_state=1410) # b = KNeighborsClassifier(weights='distance') # b = MLPClassifier(random_state=1410) # b = SVC(kernel='rbf', probability=False, random_state=1410) n_estimators = 50 acc_prob = False base_clf = StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410) clfs = { "MV-GNB-ROS": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="ROS"), "MV-GNB-SMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="SMOTE"), "MV-GNB-SVMSMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="SVMSMOTE"), "MV-GNB-B2SMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="B2SMOTE"), } n_splits = 5 n_repeats = 1 rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=1410) scores = np.zeros((len(clfs), n_splits * n_repeats, len(metrics))) for fold_id, (train, test) in enumerate(rskf.split(X, y)): print("FOLD %i: " % fold_id) for clf_id, clf_name in enumerate(clfs): clf = clone(clfs[clf_name]) clf = clfs[clf_name] clf.fit(X[train], y[train]) y_pred = clf.predict(X[test]) for m_indx, (name, metric) in enumerate(metrics.items()): scores[clf_id, fold_id, m_indx] = metric(y[test], y_pred) np.save("results/gnb/%s_gnb_preproc" % key, scores) print("Dataset: %s end" % key)
from skmultiflow.trees import HoeffdingTree if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42)) knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), des="KNORAU1") knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), des="KNORAU2") knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), des="KNORAE1") knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), des="KNORAE2") clfs = (sea, knorau1, knorau2, knorae1, knorae2)
from sklearn.svm import SVC if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) none_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="None"), oversampled="None", des="KNORAU1") rus_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="RUS"), oversampled="RUS", des="KNORAU1") cnn_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU1") none_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="None"), oversampled="None", des="KNORAE1") rus_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="RUS"), oversampled="RUS", des="KNORAE1") cnn_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE1") clfs = (none_knorau1, rus_knorau1, cnn_knorau1, none_knorae1, rus_knorae1, cnn_knorae1) # Define worker def worker(i, stream_n):
exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.streams(random_state) print(len(streams)) ob = OnlineBagging(n_estimators=20, base_estimator=GaussianNB()) oob = OOB(n_estimators=20, base_estimator=GaussianNB()) uob = UOB(n_estimators=20, base_estimator=GaussianNB()) ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2") ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2") cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42,
if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) sea = SEA(base_estimator=StratifiedBagging(base_estimator=HoeffdingTree( split_criterion='hellinger'), random_state=42)) knorau1 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), des="KNORAU1") knorau2 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), des="KNORAU2") knorae1 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), des="KNORAE1") knorae2 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'),
if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) none_knorau1 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42, oversampler="None"), oversampled="None", des="KNORAU1") ros_knorau1 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU1") b2_knorau1 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42, oversampler="B2"), oversampled="B2", des="KNORAU1")
) import sys from sklearn.base import clone from sklearn.tree import DecisionTreeClassifier from skmultiflow.trees import HoeffdingTree # Select streams and methods streams = h.realstreams() print(len(streams)) ob = OnlineBagging(n_estimators=20, base_estimator=GaussianNB()) oob = OOB(n_estimators=20, base_estimator=GaussianNB()) uob = UOB(n_estimators=20, base_estimator=GaussianNB()) # sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42)) ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2") ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2") cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE2") clfs = (ob, oob, uob, ros_knorau2, cnn_knorau2, ros_knorae2, cnn_knorae2) # Define worker def worker(i, stream_n): stream = streams[stream_n] key = list(streams.keys())[i] cclfs = [clone(clf) for clf in clfs]
from csm import SEA, StratifiedBagging, REA, LearnppCDS, LearnppNIE, OUSE, KMeanClustering, rea, TestThenTrain from sklearn.naive_bayes import GaussianNB from strlearn.streams import StreamGenerator # from strlearn.evaluators import TestThenTrain from strlearn.metrics import (balanced_accuracy_score, f1_score, geometric_mean_score_1, precision, recall, specificity) import matplotlib.pyplot as plt from scipy.ndimage.filters import gaussian_filter1d import numpy as np from sklearn.metrics import roc_auc_score rea = REA(base_classifier=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), number_of_classifiers=5) cds = LearnppCDS(base_classifier=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), number_of_classifiers=5) nie = LearnppNIE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), number_of_classifiers=5) ouse = OUSE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), number_of_classifiers=5) kmc = KMeanClustering(base_classifier=StratifiedBagging( base_estimator=GaussianNB(), random_state=42), number_of_classifiers=5) sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42),
from skmultiflow.trees import HoeffdingTree if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) none_knorau2 = SEA(base_estimator=StratifiedBagging( base_estimator=GaussianNB(), random_state=42, oversampler="None"), oversampled="None", des="KNORAU2") ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") b2_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="B2"), oversampled="B2", des="KNORAU2") none_knorae2 = SEA(base_estimator=StratifiedBagging( base_estimator=GaussianNB(), random_state=42, oversampler="None"), oversampled="None",
recall, specificity ) import sys from sklearn.base import clone from sklearn.tree import DecisionTreeClassifier from skmultiflow.trees import HoeffdingTree import time import matplotlib.pyplot as plt # Select streams and methods streams = h.timestream(100) print(len(streams)) ros_knorau2_3 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=3) ros_knorau2_5 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2", n_estimators=5) ros_knorau2_10 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=10) ros_knorau2_15 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=15) ros_knorau2_30 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=30) # cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( # ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2") # ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( # ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2") # cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( # ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAE2")
from sklearn.neighbors import KNeighborsClassifier if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) sea = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42)) knorau1 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42), des="KNORAU1") knorau2 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42), des="KNORAU2") knorae1 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42), des="KNORAE1") knorae2 = SEA(base_estimator=StratifiedBagging( base_estimator=KNeighborsClassifier(weights='distance'), random_state=42), des="KNORAE2") clfs = (sea, knorau1, knorau2, knorae1, knorae2) # Define worker
from sklearn.svm import SVC if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) sea = SEA(base_estimator=StratifiedBagging( base_estimator=SVC(probability=True, random_state=42), random_state=42)) knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42), des="KNORAU1") knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42), des="KNORAU2") knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42), des="KNORAE1") knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42),
if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) none_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="None"), oversampled="None", des="KNORAU1") ros_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU1") b2_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC( probability=True, random_state=42), random_state=42, oversampler="B2"), oversampled="B2", des="KNORAU1")
geometric_mean_score_1, precision, recall, specificity ) import sys from sklearn.base import clone from sklearn.tree import DecisionTreeClassifier from skmultiflow.trees import HoeffdingTree # Select streams and methods streams = h.moa_streams() print(len(streams)) rea = REA(base_classifier=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42), number_of_classifiers=5) cds = LearnppCDS(base_classifier=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42), number_of_classifiers=5) nie = LearnppNIE(base_classifier=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42), number_of_classifiers=5) ouse = OUSE(base_classifier=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42), number_of_classifiers=5) kmc = KMeanClustering(base_classifier=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42), number_of_classifiers=5) ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2") ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB( ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2") cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE2")
from sklearn.neural_network import MLPClassifier if len(sys.argv) != 2: print("PODAJ RS") exit() else: random_state = int(sys.argv[1]) print(random_state) # Select streams and methods streams = h.toystreams(random_state) print(len(streams)) gnb = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42)) ht = SEA(base_estimator=StratifiedBagging(base_estimator=HoeffdingTree(), random_state=42)) clfs = (gnb, ht) # Define worker def worker(i, stream_n): stream = streams[stream_n] cclfs = [clone(clf) for clf in clfs] print("Starting stream %i/%i" % (i + 1, len(streams))) eval = TestThenTrain(metrics=( balanced_accuracy_score, geometric_mean_score_1, f1_score,
from sklearn.base import clone from sklearn.tree import DecisionTreeClassifier from skmultiflow.trees import HoeffdingTree # Select streams and methods streams = h.realstreams() print(len(streams)) ob = OnlineBagging(n_estimators=20, base_estimator=HoeffdingTree(split_criterion='hellinger')) oob = OOB(n_estimators=20, base_estimator=HoeffdingTree(split_criterion='hellinger')) uob = UOB(n_estimators=20, base_estimator=HoeffdingTree(split_criterion='hellinger')) ros_knorau2 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") cnn_knorau2 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2") ros_knorae2 = SEA(base_estimator=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2")
from strlearn.evaluators import TestThenTrain from sklearn.naive_bayes import GaussianNB from strlearn.metrics import (balanced_accuracy_score, f1_score, geometric_mean_score_1, precision, recall, specificity) import sys from sklearn.base import clone from sklearn.tree import DecisionTreeClassifier from skmultiflow.trees import HoeffdingTree # Select streams and methods streams = h.realstreams2() print(len(streams)) rea = REA(base_classifier=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), number_of_classifiers=5) cds = LearnppCDS(base_classifier=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), number_of_classifiers=5) nie = LearnppNIE(base_classifier=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), number_of_classifiers=5) ouse = OUSE(base_classifier=StratifiedBagging( base_estimator=HoeffdingTree(split_criterion='hellinger'), random_state=42), number_of_classifiers=5) kmc = KMeanClustering(base_classifier=StratifiedBagging(