def conduct(self, file=None):
        for r_i, r in enumerate(self._streams_random_seeds):

            if self._gradual_drift:
                stream = StreamGenerator(n_chunks=self.n_chunks,
                                         chunk_size=500,
                                         n_drifts=5,
                                         weights=self._proportions,
                                         n_features=2,
                                         n_informative=2,
                                         n_redundant=0,
                                         n_repeated=0,
                                         random_state=r,
                                         concept_sigmoid_spacing=5)
            else:
                stream = StreamGenerator(n_chunks=self.n_chunks,
                                         chunk_size=500,
                                         n_drifts=5,
                                         weights=self._proportions,
                                         n_features=2,
                                         n_informative=2,
                                         n_redundant=0,
                                         n_repeated=0,
                                         random_state=r)

            self._evaluator.process(stream, self._ensembles)

            self._scores[r_i, :, :, :] = self._evaluator.scores[:, :, :]

        if file is not None:
            print("Saving file " + file)
            np.save(file, self._scores)
Пример #2
0
def streams(random_state):
    # Variables
    # distributions = [[0.95, 0.05], [0.90, 0.10], [0.85, 0.15]]
    distributions = [[0.97, 0.03]]
    label_noises = [
        0.01,
        0.03,
        0.05,
    ]
    incremental = [False, True]
    ccs = [5, None]
    n_drifts = 1

    # Prepare streams
    streams = {}
    for drift_type in incremental:
        for distribution in distributions:
            for flip_y in label_noises:
                for spacing in ccs:
                    stream = StreamGenerator(
                        incremental=drift_type,
                        weights=distribution,
                        random_state=random_state,
                        y_flip=flip_y,
                        concept_sigmoid_spacing=spacing,
                        n_drifts=n_drifts,
                        chunk_size=250,
                        n_chunks=200,
                        n_clusters_per_class=1,
                        n_features=8,
                        n_informative=8,
                        n_redundant=0,
                        n_repeated=0,
                    )
                    if spacing == None and drift_type == True:
                        pass
                    else:
                        streams.update({str(stream): stream})

    return streams
Пример #3
0
def timestream(chunk_size):
    # Variables
    distributions = [[0.80, 0.20]]
    label_noises = [
        0.01,
    ]
    incremental = [False]
    ccs = [None]
    n_drifts = 1

    # Prepare streams
    streams = {}
    for drift_type in incremental:
        for distribution in distributions:
            for flip_y in label_noises:
                for spacing in ccs:
                    stream = StreamGenerator(
                        incremental=drift_type,
                        weights=distribution,
                        random_state=1994,
                        y_flip=flip_y,
                        concept_sigmoid_spacing=spacing,
                        n_drifts=n_drifts,
                        chunk_size=chunk_size,
                        n_chunks=5,
                        n_clusters_per_class=1,
                        n_features=8,
                        n_informative=8,
                        n_redundant=0,
                        n_repeated=0,
                    )
                    if spacing == None and drift_type == True:
                        pass
                    else:
                        streams.update({str(stream): stream})

    return streams
Пример #4
0
import numpy as np
from tqdm import tqdm
from strlearn.evaluators import TestThenTrain

mcargs = {
    "n_classes": 2,
    "n_chunks": 250,
    "chunk_size": 400,
    "n_features": 10,
}

streams = {
    "incremental1":
    StreamGenerator(**mcargs,
                    n_drifts=1,
                    concept_sigmoid_spacing=5,
                    incremental=True,
                    weights=[0.8, 0.2],
                    random_state=14),
    "incremental2":
    StreamGenerator(**mcargs,
                    n_drifts=1,
                    concept_sigmoid_spacing=5,
                    incremental=True,
                    weights=[0.8, 0.2],
                    random_state=67),
    "incremental3":
    StreamGenerator(**mcargs,
                    n_drifts=1,
                    concept_sigmoid_spacing=5,
                    incremental=True,
                    weights=[0.8, 0.2],
Пример #5
0
import numpy as np
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
from strlearn.streams import StreamGenerator
from strlearn.evaluators import TestThenTrain
from strlearn.ensembles import SEA, OnlineBagging ,OOB, UOB
from strlearn.metrics import recall, specificity, balanced_accuracy_score

stream = StreamGenerator(n_classes=2,
                         n_chunks=200,
                         chunk_size=400,
                         n_features=10,
                         n_drifts=1,
                         weights=[0.8, 0.2],
                         random_state=95),

clfs = [
    SEA(GaussianNB(), n_estimators=5),
    OnlineBagging(GaussianNB(), n_estimators=5),
    OOB(GaussianNB(), n_estimators=5),
    UOB(GaussianNB(), n_estimators=5),
]

clf_names = [
    "SEA",
    "OB",
    "OOB",
    "UOB",
]

metrics = [
Пример #6
0
streams = {}
for i, clf in enumerate(clfs):
    for j, random_state in enumerate(random_states):
        for k, kurwa in enumerate(drifttype):
            for l, distribution in enumerate(distributions):
                for m, flip_y in enumerate(label_noises):
                    # for n, spacing in enumerate(css):
                    spacing, drift_type = kurwa
                    stream = StreamGenerator(
                        incremental=drift_type,
                        weights=distribution,
                        random_state=random_state,
                        y_flip=flip_y,
                        concept_sigmoid_spacing=spacing,
                        n_drifts=1,
                        n_chunks=200,
                        chunk_size=250,
                        n_clusters_per_class = 1,
                        n_features = 8,
                        n_informative= 8,
                        n_redundant= 0,
                        n_repeated = 0,
                    )
                    if spacing == None and drift_type == True:
                        pass
                    else:
                        results = np.load(
                            "results/experiment1_%s/%s.npy" % (clf, stream)
                        )
                        scores[i, j, k, l, m] = results
scores_metrics = scores
Пример #7
0
#ewaluator z odpowiednia metryka
evaluator = TestThenTrain(metrics=accuracy_score)

#przygotowanie pliku z wynikami
f_sudden = open("wynikidryfnagly.csv", "a")
f_gradual = open("wynikidryfgradualny.csv", "a")
f_incremental = open("wynikidryfinkrementalny", "a")

#usrednianie po n_chunks i wypisywanie wynikow, osobne pliki dla kazdego dryfu
#wyniki sa zapisywane w jednej kolumnie ->
#ciag: srednia_str1_clf1, srednia_str1_clf2, srednia_str1_clf3, srednia_str2_clf1, ...
for rnd_st in range(10, 110,
                    10):  #rnd_st przechowuje aktualna wartosc random_state
    #wypisywanie wynikow dla dryfu naglego dla wszystkich klasyfikatorow
    str_sudden = StreamGenerator(n_drifts=1, random_state=rnd_st)
    evaluator.process(str_sudden, clfs)
    array2d = evaluator.scores.reshape(249, 3)
    resultsmean = np.mean(array2d, axis=0)
    np.savetxt(f_sudden, resultsmean, delimiter=",", fmt='%0.3f')

    #wypisywanie wynikow dla dryfu gradualnego dla wszystkich klasyfikatorow
    str_gradual = StreamGenerator(n_drifts=1,
                                  concept_sigmoid_spacing=5,
                                  random_state=rnd_st)
    evaluator.process(str_gradual, clfs)
    array2d = evaluator.scores.reshape(249, 3)
    resultsmean = np.mean(array2d, axis=0)
    np.savetxt(f_gradual, resultsmean, delimiter=",", fmt='%0.3f')

    #wypisywanie wynikow dla dryfu inkremetalnego dla wszystkich klasyfikatorow
Пример #8
0
    "n_chunks": 100,
    "chunk_size": 500,
    "random_state": 5,
    "n_features": 2,
    "n_informative": 2,
    "n_redundant": 0,
    "n_repeated": 0,
    "n_features": 2,
    "n_clusters_per_class": 1,
}

streams = {
    "9_first":
    StreamGenerator(n_drifts=3,
                    concept_sigmoid_spacing=5,
                    reocurring=True,
                    incremental=True,
                    weights=(2, 5, 0.9),
                    **mcargs),
    "9_second":
    StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs),
}

for stream_name in streams:
    print(stream_name)
    stream = streams[stream_name]

    checkpoints = np.linspace(0, stream.n_chunks - 1, 8).astype(int)

    fig = plt.figure(constrained_layout=True, figsize=(8, 6))

    gs = GridSpec(5, len(checkpoints), figure=fig)
Пример #9
0
mcargs = {
    "n_classes": 3,
    "n_chunks": 100,
    "chunk_size": 500,
    "random_state": 105,
    "n_features": 2,
    "n_informative": 2,
    "n_redundant": 0,
    "n_repeated": 0,
    "n_features": 2,
    "n_clusters_per_class": 1,
}

streams = {
    "0_stationary": StreamGenerator(**mcargs),
    "1_sudden": StreamGenerator(n_drifts=1, **mcargs),
    "2_gradual": StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs),
    "3_incremental": StreamGenerator(
        n_drifts=1, concept_sigmoid_spacing=5, incremental=True, **mcargs
    ),
    "4_reocurring": StreamGenerator(
        n_drifts=2, concept_sigmoid_spacing=5, reocurring=True, **mcargs
    ),
    "5_nonreocurring": StreamGenerator(
        n_drifts=2, concept_sigmoid_spacing=5, reocurring=False, **mcargs
    ),
}

mcargs.update({"n_classes": 2, "random_state": 5})
streams.update(
Пример #10
0
from sklearn.naive_bayes import GaussianNB
from strlearn.metrics import recall, specificity, balanced_accuracy_score
import numpy as np
from tqdm import tqdm
from strlearn.evaluators import TestThenTrain

mcargs = {
    "n_classes": 2,
    "n_chunks": 250,
    "chunk_size": 400,
    "n_features": 10,
}

streams = {
    "sudden1":
    StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=14),
    "sudden2":
    StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=67),
    "sudden3":
    StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1], random_state=95),
    "sudden4":
    StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1],
                    random_state=234),
    "sudden5":
    StreamGenerator(**mcargs, n_drifts=1, weights=[0.9, 0.1],
                    random_state=876),
    "sudden6":
    StreamGenerator(**mcargs,
                    n_drifts=1,
                    weights=[0.9, 0.1],
                    random_state=1410),
Пример #11
0
"""
===================================================================
The simplest experiment example with one classifier and two metrics
===================================================================

Lorem impsum.

"""

from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()

from strlearn.streams import StreamGenerator

stream = StreamGenerator(n_chunks=30, n_drifts=1)

from sklearn.metrics import accuracy_score
from strlearn.metrics import recall

metrics = [accuracy_score, recall]

from strlearn.evaluators import TestThenTrain

evaluator = TestThenTrain(metrics)

evaluator.process(stream, clf)

print(evaluator.scores.shape)
evaluator.scores
Пример #12
0
                      number_of_classifiers=5)

sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                           random_state=42),
          n_estimators=5,
          metric=roc_auc_score)

ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
                                                   oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU2")

stream = StreamGenerator(n_chunks=250,
                         chunk_size=200,
                         random_state=1410,
                         n_drifts=1,
                         weights=[0.9, 0.1])
eval = TestThenTrain(metrics=(geometric_mean_score_1))

eval.process(stream, [kmc])
value = np.squeeze(eval.scores[0])
val = gaussian_filter1d(value, sigma=3, mode="nearest")
plt.plot(val)
plt.savefig("zzz")

# print(kmc.new_auc)
# print(kmc.auc_array)
# print(kmc.worst)

# print(eval.scores)
Пример #13
0
    "n_features": 2,
    "n_informative": 2,
    "n_redundant": 0,
    "n_repeated": 0,
}

#stream = sl.streams.StreamGenerator(**concept_kwargs)
#stream1 = sl.streams.StreamGenerator(**concept_kwargs)

cm = LinearSegmentedColormap.from_list("lokomotiv",
                                       colors=[(0.3, 0.7, 0.3),
                                               (0.7, 0.3, 0.3)])

chunks_plotted = np.linspace(0, n_chunks - 1, 8).astype(int)

stream = StreamGenerator(n_chunks=100, n_drifts=1)
stream1 = StreamGenerator(n_chunks=100, n_drifts=1)

plot_stream(stream1, "stationary", "Stationary stream")

from sklearn.metrics import accuracy_score
from strlearn.metrics import precision
metrics = [accuracy_score, precision]

from strlearn.evaluators import TestThenTrain
evaluator = TestThenTrain(metrics)

evaluator.process(stream, clf)

plt.figure(figsize=(6, 3))
Пример #14
0
mcargs = {
    "n_classes": 3,
    "n_chunks": 100,
    "chunk_size": 500,
    "random_state": 105,
    "n_features": 2,
    "n_informative": 2,
    "n_redundant": 0,
    "n_repeated": 0,
    "n_features": 2,
    "n_clusters_per_class": 1,
}

streams = {
    "0_stationary":
    StreamGenerator(**mcargs),
    "1_sudden":
    StreamGenerator(n_drifts=1, **mcargs),
    "2_gradual":
    StreamGenerator(n_drifts=1, concept_sigmoid_spacing=5, **mcargs),
    "3_incremental":
    StreamGenerator(n_drifts=1,
                    concept_sigmoid_spacing=5,
                    incremental=True,
                    **mcargs),
    "4_reocurring":
    StreamGenerator(n_drifts=2,
                    concept_sigmoid_spacing=5,
                    reocurring=True,
                    **mcargs),
    "5_nonreocurring":
Пример #15
0
import numpy as np
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
from strlearn.streams import StreamGenerator
from strlearn.evaluators import TestThenTrain
from strlearn.ensembles import SEA, OnlineBagging ,OOB, UOB
from strlearn.metrics import recall, specificity, balanced_accuracy_score

stream = StreamGenerator(n_classes=2,
                         n_chunks=200,
                         chunk_size=400,
                         n_features=10,
                         concept_sigmoid_spacing=5,
                         incremental=True,
                         n_drifts=1,
                         weights=[0.8, 0.2],
                         random_state=95)

clfs = [
    SEA(GaussianNB(), n_estimators=5),
    OnlineBagging(GaussianNB(), n_estimators=5),
    OOB(GaussianNB(), n_estimators=5),
    UOB(GaussianNB(), n_estimators=5),
]

clf_names = [
    "SEA",
    "OB",
    "OOB",
    "UOB",
]