def conduct(self, file=None): for r_i, r in enumerate(self._streams_random_seeds): if self._gradual_drift: stream = StreamGenerator(n_chunks=self.n_chunks, chunk_size=500, n_drifts=5, weights=self._proportions, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, random_state=r, concept_sigmoid_spacing=5) else: stream = StreamGenerator(n_chunks=self.n_chunks, chunk_size=500, n_drifts=5, weights=self._proportions, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, random_state=r) self._evaluator.process(stream, self._ensembles) self._scores[r_i, :, :, :] = self._evaluator.scores[:, :, :] if file is not None: print("Saving file " + file) np.save(file, self._scores)
def streams(random_state): # Variables # distributions = [[0.95, 0.05], [0.90, 0.10], [0.85, 0.15]] distributions = [[0.97, 0.03]] label_noises = [ 0.01, 0.03, 0.05, ] incremental = [False, True] ccs = [5, None] n_drifts = 1 # Prepare streams streams = {} for drift_type in incremental: for distribution in distributions: for flip_y in label_noises: for spacing in ccs: stream = StreamGenerator( incremental=drift_type, weights=distribution, random_state=random_state, y_flip=flip_y, concept_sigmoid_spacing=spacing, n_drifts=n_drifts, chunk_size=250, n_chunks=200, n_clusters_per_class=1, n_features=8, n_informative=8, n_redundant=0, n_repeated=0, ) if spacing == None and drift_type == True: pass else: streams.update({str(stream): stream}) return streams
def timestream(chunk_size): # Variables distributions = [[0.80, 0.20]] label_noises = [ 0.01, ] incremental = [False] ccs = [None] n_drifts = 1 # Prepare streams streams = {} for drift_type in incremental: for distribution in distributions: for flip_y in label_noises: for spacing in ccs: stream = StreamGenerator( incremental=drift_type, weights=distribution, random_state=1994, y_flip=flip_y, concept_sigmoid_spacing=spacing, n_drifts=n_drifts, chunk_size=chunk_size, n_chunks=5, n_clusters_per_class=1, n_features=8, n_informative=8, n_redundant=0, n_repeated=0, ) if spacing == None and drift_type == True: pass else: streams.update({str(stream): stream}) return streams
import numpy as np from tqdm import tqdm from strlearn.evaluators import TestThenTrain mcargs = { "n_classes": 2, "n_chunks": 250, "chunk_size": 400, "n_features": 10, } streams = { "incremental1": StreamGenerator(**mcargs, n_drifts=1, concept_sigmoid_spacing=5, incremental=True, weights=[0.8, 0.2], random_state=14), "incremental2": StreamGenerator(**mcargs, n_drifts=1, concept_sigmoid_spacing=5, incremental=True, weights=[0.8, 0.2], random_state=67), "incremental3": StreamGenerator(**mcargs, n_drifts=1, concept_sigmoid_spacing=5, incremental=True, weights=[0.8, 0.2],
import numpy as np from sklearn.naive_bayes import GaussianNB import matplotlib.pyplot as plt from strlearn.streams import StreamGenerator from strlearn.evaluators import TestThenTrain from strlearn.ensembles import SEA, OnlineBagging ,OOB, UOB from strlearn.metrics import recall, specificity, balanced_accuracy_score stream = StreamGenerator(n_classes=2, n_chunks=200, chunk_size=400, n_features=10, n_drifts=1, weights=[0.8, 0.2], random_state=95), clfs = [ SEA(GaussianNB(), n_estimators=5), OnlineBagging(GaussianNB(), n_estimators=5), OOB(GaussianNB(), n_estimators=5), UOB(GaussianNB(), n_estimators=5), ] clf_names = [ "SEA", "OB", "OOB", "UOB", ] metrics = [
streams = {} for i, clf in enumerate(clfs): for j, random_state in enumerate(random_states): for k, kurwa in enumerate(drifttype): for l, distribution in enumerate(distributions): for m, flip_y in enumerate(label_noises): # for n, spacing in enumerate(css): spacing, drift_type = kurwa stream = StreamGenerator( incremental=drift_type, weights=distribution, random_state=random_state, y_flip=flip_y, concept_sigmoid_spacing=spacing, n_drifts=1, n_chunks=200, chunk_size=250, n_clusters_per_class = 1, n_features = 8, n_informative= 8, n_redundant= 0, n_repeated = 0, ) if spacing == None and drift_type == True: pass else: results = np.load( "results/experiment1_%s/%s.npy" % (clf, stream) ) scores[i, j, k, l, m] = results scores_metrics = scores
#ewaluator z odpowiednia metryka evaluator = TestThenTrain(metrics=accuracy_score) #przygotowanie pliku z wynikami f_sudden = open("wynikidryfnagly.csv", "a") f_gradual = open("wynikidryfgradualny.csv", "a") f_incremental = open("wynikidryfinkrementalny", "a") #usrednianie po n_chunks i wypisywanie wynikow, osobne pliki dla kazdego dryfu #wyniki sa zapisywane w jednej kolumnie -> #ciag: srednia_str1_clf1, srednia_str1_clf2, srednia_str1_clf3, srednia_str2_clf1, ... for rnd_st in range(10, 110, 10): #rnd_st przechowuje aktualna wartosc random_state #wypisywanie wynikow dla dryfu naglego dla wszystkich klasyfikatorow str_sudden = StreamGenerator(n_drifts=1, random_state=rnd_st) evaluator.process(str_sudden, clfs) array2d = evaluator.scores.reshape(249, 3) resultsmean = np.mean(array2d, axis=0) np.savetxt(f_sudden, resultsmean, delimiter=",", fmt='%0.3f') #wypisywanie wynikow dla dryfu gradualnego dla wszystkich klasyfikatorow str_gradual = StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, random_state=rnd_st) evaluator.process(str_gradual, clfs) array2d = evaluator.scores.reshape(249, 3) resultsmean = np.mean(array2d, axis=0) np.savetxt(f_gradual, resultsmean, delimiter=",", fmt='%0.3f') #wypisywanie wynikow dla dryfu inkremetalnego dla wszystkich klasyfikatorow
"n_chunks": 100, "chunk_size": 500, "random_state": 5, "n_features": 2, "n_informative": 2, "n_redundant": 0, "n_repeated": 0, "n_features": 2, "n_clusters_per_class": 1, } streams = { "9_first": StreamGenerator(n_drifts=3, concept_sigmoid_spacing=5, reocurring=True, incremental=True, weights=(2, 5, 0.9), **mcargs), "9_second": StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs), } for stream_name in streams: print(stream_name) stream = streams[stream_name] checkpoints = np.linspace(0, stream.n_chunks - 1, 8).astype(int) fig = plt.figure(constrained_layout=True, figsize=(8, 6)) gs = GridSpec(5, len(checkpoints), figure=fig)
mcargs = { "n_classes": 3, "n_chunks": 100, "chunk_size": 500, "random_state": 105, "n_features": 2, "n_informative": 2, "n_redundant": 0, "n_repeated": 0, "n_features": 2, "n_clusters_per_class": 1, } streams = { "0_stationary": StreamGenerator(**mcargs), "1_sudden": StreamGenerator(n_drifts=1, **mcargs), "2_gradual": StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs), "3_incremental": StreamGenerator( n_drifts=1, concept_sigmoid_spacing=5, incremental=True, **mcargs ), "4_reocurring": StreamGenerator( n_drifts=2, concept_sigmoid_spacing=5, reocurring=True, **mcargs ), "5_nonreocurring": StreamGenerator( n_drifts=2, concept_sigmoid_spacing=5, reocurring=False, **mcargs ), } mcargs.update({"n_classes": 2, "random_state": 5}) streams.update(
from sklearn.naive_bayes import GaussianNB from strlearn.metrics import recall, specificity, balanced_accuracy_score import numpy as np from tqdm import tqdm from strlearn.evaluators import TestThenTrain mcargs = { "n_classes": 2, "n_chunks": 250, "chunk_size": 400, "n_features": 10, } streams = { "sudden1": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=14), "sudden2": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=67), "sudden3": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=95), "sudden4": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=234), "sudden5": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=876), "sudden6": StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=1410),
""" =================================================================== The simplest experiment example with one classifier and two metrics =================================================================== Lorem impsum. """ from sklearn.naive_bayes import GaussianNB clf = GaussianNB() from strlearn.streams import StreamGenerator stream = StreamGenerator(n_chunks=30, n_drifts=1) from sklearn.metrics import accuracy_score from strlearn.metrics import recall metrics = [accuracy_score, recall] from strlearn.evaluators import TestThenTrain evaluator = TestThenTrain(metrics) evaluator.process(stream, clf) print(evaluator.scores.shape) evaluator.scores
number_of_classifiers=5) sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42), n_estimators=5, metric=roc_auc_score) ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2") stream = StreamGenerator(n_chunks=250, chunk_size=200, random_state=1410, n_drifts=1, weights=[0.9, 0.1]) eval = TestThenTrain(metrics=(geometric_mean_score_1)) eval.process(stream, [kmc]) value = np.squeeze(eval.scores[0]) val = gaussian_filter1d(value, sigma=3, mode="nearest") plt.plot(val) plt.savefig("zzz") # print(kmc.new_auc) # print(kmc.auc_array) # print(kmc.worst) # print(eval.scores)
"n_features": 2, "n_informative": 2, "n_redundant": 0, "n_repeated": 0, } #stream = sl.streams.StreamGenerator(**concept_kwargs) #stream1 = sl.streams.StreamGenerator(**concept_kwargs) cm = LinearSegmentedColormap.from_list("lokomotiv", colors=[(0.3, 0.7, 0.3), (0.7, 0.3, 0.3)]) chunks_plotted = np.linspace(0, n_chunks - 1, 8).astype(int) stream = StreamGenerator(n_chunks=100, n_drifts=1) stream1 = StreamGenerator(n_chunks=100, n_drifts=1) plot_stream(stream1, "stationary", "Stationary stream") from sklearn.metrics import accuracy_score from strlearn.metrics import precision metrics = [accuracy_score, precision] from strlearn.evaluators import TestThenTrain evaluator = TestThenTrain(metrics) evaluator.process(stream, clf) plt.figure(figsize=(6, 3))
mcargs = { "n_classes": 3, "n_chunks": 100, "chunk_size": 500, "random_state": 105, "n_features": 2, "n_informative": 2, "n_redundant": 0, "n_repeated": 0, "n_features": 2, "n_clusters_per_class": 1, } streams = { "0_stationary": StreamGenerator(**mcargs), "1_sudden": StreamGenerator(n_drifts=1, **mcargs), "2_gradual": StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs), "3_incremental": StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, incremental=True, **mcargs), "4_reocurring": StreamGenerator(n_drifts=2, concept_sigmoid_spacing=5, reocurring=True, **mcargs), "5_nonreocurring":
import numpy as np from sklearn.naive_bayes import GaussianNB import matplotlib.pyplot as plt from strlearn.streams import StreamGenerator from strlearn.evaluators import TestThenTrain from strlearn.ensembles import SEA, OnlineBagging ,OOB, UOB from strlearn.metrics import recall, specificity, balanced_accuracy_score stream = StreamGenerator(n_classes=2, n_chunks=200, chunk_size=400, n_features=10, concept_sigmoid_spacing=5, incremental=True, n_drifts=1, weights=[0.8, 0.2], random_state=95) clfs = [ SEA(GaussianNB(), n_estimators=5), OnlineBagging(GaussianNB(), n_estimators=5), OOB(GaussianNB(), n_estimators=5), UOB(GaussianNB(), n_estimators=5), ] clf_names = [ "SEA", "OB", "OOB", "UOB", ]