示例#1
0
def demo():
    """ _test_knn
    
    This demo tests the KNNClassifier on a file stream, which gives
    instances coming from a SEA generator. 
    
    The test computes the performance of the KNNClassifier as well as
    the time to create the structure and classify max_samples (5000 by 
    default) instances.
    
    """
    stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
                        "master/sea_big.csv")

    train = 200
    X, y = stream.next_sample(train)
    # t = OneHotToCategorical([[10, 11, 12, 13],
    #                         [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                          36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])
    # t2 = OneHotToCategorical([[10, 11, 12, 13],
    #                         [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                          36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])
    start = timer()
    knn = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40)
    # pipe = Pipeline([('one_hot_to_categorical', t), ('KNNClassifier', knn)])

    # compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean')

    # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNNClassifier', compare)])

    # pipe.fit(X, y)
    # pipe2.fit(X, y)
    knn.partial_fit(X, y)
    # compare.fit(X, y)

    n_samples = 0
    max_samples = 5000
    my_corrects = 0
    # compare_corrects = 0

    while n_samples < max_samples:
        X, y = stream.next_sample()
        # my_pred = pipe.predict(X)
        my_pred = knn.predict(X)
        # compare_pred = pipe2.predict(X)
        # compare_pred = compare.predict(X)
        if y[0] == my_pred[0]:
            my_corrects += 1
        # if y[0] == compare_pred[0]:
        #     compare_corrects += 1
        n_samples += 1

    end = timer()

    print('Evaluation time: ' + str(end-start))
    print(str(n_samples) + ' samples analyzed.')
    print('My performance: ' + str(my_corrects/n_samples))
def test_leverage_bagging_me():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)

    # leveraging_bag_me
    learner = LeveragingBaggingClassifier(
        base_estimator=knn,
        n_estimators=3,
        random_state=112,
        leverage_algorithm='leveraging_bag_me')

    y_expected = np.asarray([
        0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        1, 0
    ],
                            dtype=np.int)

    run_prequential_supervised(stream,
                               learner,
                               max_samples=2000,
                               n_wait=40,
                               y_expected=y_expected)
示例#3
0
    def __init__(self,
                 base_estimator=KNNClassifier(),
                 n_estimators=10,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag',
                 random_state=None):

        super().__init__()
        # default values
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.classes = None
        self.init_matrix_codes = None
        self._random_state = None   # This is the actual random_state object used internally
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.enable_code_matrix = enable_code_matrix
        self.w = w
        self.delta = delta
        if leverage_algorithm not in self._LEVERAGE_ALGORITHMS:
            raise ValueError("Invalid option for leverage_algorithm: '{}'\n"
                             "Valid options are: {}".format(leverage_algorithm,
                                                            self._LEVERAGE_ALGORITHMS))
        self.leverage_algorithm = leverage_algorithm
        self.random_state = random_state
        self.__configure()
示例#4
0
    def retrain(self, labeled_tweets: list):
        labels = set()
        for tweet in labeled_tweets:
            if "labels" in tweet and len(tweet["labels"]) > 0:
                labels.update([l for l in tweet["labels"] if not (l in self.labels_sent or l in self.labels_relevance)])
        self.labels = list(labels)
        assert "Irrelevant" not in self.labels, "Something went wrong"
        self.lcc = ClassifierChain(SGDClassifier(max_iter=100, loss='log', random_state=1))
        self.clrel = KNNClassifier()
        self.clsent = KNNClassifier()
        X, y, ys, yr = [], [], [], []
        for tweet in labeled_tweets:
            if "labels" in tweet and len(tweet["labels"]) > 0:
                X.append(tweet["tweet"])
                y.append(self._labels2array(tweet["labels"]))
                sls = [l for l, v in tweet["labels"].items() if l in self.labels_sent and v]
                if len(sls) == 1:
                    ys.append(self.labels_sent[sls[0]])
                else:
                    ys.append(self.labels_sent["NEUTRAL"])
                if self.labels_relevance[0] in tweet["labels"] and tweet["labels"][self.labels_relevance[0]]:
                    yr.append(1)
                else:
                    yr.append(0)

        X = np.array(self.tokenizer.transform(X).todense())
        y = np.array(y)
        ys = np.array(ys)
        yr = np.array(yr)
        self.clsent.fit(X, ys)
        print("Trained Sentiment Classifier")
        self.clrel.fit(X, yr)
        print("Trained Relevance Classifier")
        X2, y2 = [], []
        for Xe, ye in zip(X, y):
            if ye.sum() > 0:
                X2.append(Xe)
                y2.append(ye)
        X = np.array(X2)
        y = np.array(y2)
        self.lcc.fit(X, y)
        print("Trained Catecorical Classifier")
def LeverageBagging(base_estimator=KNNClassifier(), n_estimators=10, w=6, delta=0.002, enable_code_matrix=False,
                    leverage_algorithm='leveraging_bag', random_state=None):     # pragma: no cover
    warnings.warn("'LeverageBagging' has been renamed to 'LeverageBaggingClassifier' in v0.5.0.\n"
                  "The old name will be removed in v0.7.0", category=FutureWarning)
    return LeverageBaggingClassifier(base_estimator=base_estimator,
                                     n_estimators=n_estimators,
                                     w=w,
                                     delta=delta,
                                     enable_code_matrix=enable_code_matrix,
                                     leverage_algorithm=leverage_algorithm,
                                     random_state=random_state)
示例#6
0
    def __init__(self, texts: list):
        self.tokenizer = TfidfVectorizer()
        self.tokenizer.fit(texts)

        self.labels_sent = {"POSITIVE": np.array([1, 0, 0]), "NEUTRAL": np.array([0, 1, 0]),
                            "NEGATIVE": np.array([0, 0, 1])}
        self.labels_sent = {"POSITIVE": 0, "NEUTRAL": 1,
                            "NEGATIVE": 2}
        self.reverse_sent = {0: {"POSITIVE": True, "NEUTRAL": False,
                                 "NEGATIVE": False},
                             1: {"POSITIVE": False, "NEUTRAL": True,
                                 "NEGATIVE": False},
                             2: {"POSITIVE": False, "NEUTRAL": False,
                                 "NEGATIVE": True}}

        self.labels_relevance = ["Irrelevant"]
        self.labels = []
        self.lcc = ClassifierChain(SGDClassifier(max_iter=100, loss='log', random_state=1))
        self.clrel = KNNClassifier()
        self.clsent = KNNClassifier()
def test_leverage_bagging():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8,
                        leaf_size=40,
                        max_window_size=2000)
    learner = LeveragingBaggingClassifier(base_estimator=knn,
                                          n_estimators=3,
                                          random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
                            0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
                            0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
                            1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                            0, 1, 1, 0, 1, 0, 0, 1, 1]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8571428571428571
    assert np.isclose(expected_performance, performance)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \
                    " n_estimators=3, random_state=112, w=6)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
示例#8
0
def demo():
    """ _test_leverage_bagging

    This demo tests the LeverageBaggingClassifier on a file stream, which gives
    instances coming from a SEA generator. 

    The test computes the performance of the LeverageBaggingClassifier as well
    as the time to create the structure and classify max_samples (2000 by default) 
    instances.

    """
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=1)

    clf = LeverageBaggingClassifier(base_estimator=KNNClassifier(
        n_neighbors=8, max_window_size=2000, leaf_size=30),
                                    n_estimators=1,
                                    random_state=1)
    sample_count = 0
    correctly_classified = 0
    max_samples = 2000
    train_size = 200
    first = True
    if train_size > 0:
        X, y = stream.next_sample(train_size)
        clf.partial_fit(X, y, classes=stream.target_values)
        first = False

    logging.info('%s%%', 0.0)
    while sample_count < max_samples:
        if (sample_count + 1) % (max_samples / 20) == 0:
            logging.info('%s%%',
                         str(((sample_count // (max_samples / 20) + 1) * 5)))
        X, y = stream.next_sample(2)
        my_pred = clf.predict(X)
        if first:
            clf.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            clf.partial_fit(X, y)

        if my_pred is not None:
            if y[0] == my_pred[0]:
                correctly_classified += 1

        sample_count += 1

    print(str(sample_count) + ' samples analyzed.')
    print('My performance: ' + str(correctly_classified / sample_count))
    print(clf.get_info())
def test_data_stream(test_path):
    test_file = os.path.join(test_path, 'data/data_n30000.csv')
    raw_data = pd.read_csv(test_file)
    stream = DataStream(raw_data, name='Test')
    normal_knn_learner = KNNClassifier(
        n_neighbors=8,
        max_window_size=2000,
        leaf_size=40,
    )
    weighted_knn_learner = WeightedKNNClassifier(n_neighbors=8,
                                                 max_window_size=2000,
                                                 leaf_size=40)
    standardize_knn_learner = KNNClassifier(n_neighbors=8,
                                            max_window_size=2000,
                                            leaf_size=40,
                                            standardize=True)
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]

    hoeffding_learner = HoeffdingTreeClassifier(
        nominal_attributes=nominal_attr_idx)
    nb_learner = NaiveBayes()

    metrics = ['accuracy', 'kappa_m', 'kappa_t', 'recall']
    output_file = os.path.join(test_path, 'data/kkn_output.csv')
    evaluator = EvaluatePrequential(metrics=metrics, output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream,
                                model=[
                                    normal_knn_learner,
                                    weighted_knn_learner,
                                    standardize_knn_learner,
                                    hoeffding_learner,
                                    nb_learner,
                                ])
    mean_performance, current_performance = evaluator.get_measurements()
    assert 1 == 1
def test_oza_bagging():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = OzaBaggingClassifier(base_estimator=knn,
                                   n_estimators=3,
                                   random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8979591836734694
    assert np.isclose(expected_performance, performance)

    expected_correct_predictions = 44
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OzaBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "n_estimators=3, random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
示例#11
0
class LabelPredict:
    def __init__(self, texts: list):
        self.tokenizer = TfidfVectorizer()
        self.tokenizer.fit(texts)

        self.labels_sent = {"POSITIVE": np.array([1, 0, 0]), "NEUTRAL": np.array([0, 1, 0]),
                            "NEGATIVE": np.array([0, 0, 1])}
        self.labels_sent = {"POSITIVE": 0, "NEUTRAL": 1,
                            "NEGATIVE": 2}
        self.reverse_sent = {0: {"POSITIVE": True, "NEUTRAL": False,
                                 "NEGATIVE": False},
                             1: {"POSITIVE": False, "NEUTRAL": True,
                                 "NEGATIVE": False},
                             2: {"POSITIVE": False, "NEUTRAL": False,
                                 "NEGATIVE": True}}

        self.labels_relevance = ["Irrelevant"]
        self.labels = []
        self.lcc = ClassifierChain(SGDClassifier(max_iter=100, loss='log', random_state=1))
        self.clrel = KNNClassifier()
        self.clsent = KNNClassifier()

    def _labels2array(self, labeldict: dict):
        target = []
        for label in self.labels:
            if label in labeldict and labeldict[label] == True:
                target.append(1)
            else:
                target.append(0)
        return np.array(target)

    def retrain(self, labeled_tweets: list):
        labels = set()
        for tweet in labeled_tweets:
            if "labels" in tweet and len(tweet["labels"]) > 0:
                labels.update([l for l in tweet["labels"] if not (l in self.labels_sent or l in self.labels_relevance)])
        self.labels = list(labels)
        assert "Irrelevant" not in self.labels, "Something went wrong"
        self.lcc = ClassifierChain(SGDClassifier(max_iter=100, loss='log', random_state=1))
        self.clrel = KNNClassifier()
        self.clsent = KNNClassifier()
        X, y, ys, yr = [], [], [], []
        for tweet in labeled_tweets:
            if "labels" in tweet and len(tweet["labels"]) > 0:
                X.append(tweet["tweet"])
                y.append(self._labels2array(tweet["labels"]))
                sls = [l for l, v in tweet["labels"].items() if l in self.labels_sent and v]
                if len(sls) == 1:
                    ys.append(self.labels_sent[sls[0]])
                else:
                    ys.append(self.labels_sent["NEUTRAL"])
                if self.labels_relevance[0] in tweet["labels"] and tweet["labels"][self.labels_relevance[0]]:
                    yr.append(1)
                else:
                    yr.append(0)

        X = np.array(self.tokenizer.transform(X).todense())
        y = np.array(y)
        ys = np.array(ys)
        yr = np.array(yr)
        self.clsent.fit(X, ys)
        print("Trained Sentiment Classifier")
        self.clrel.fit(X, yr)
        print("Trained Relevance Classifier")
        X2, y2 = [], []
        for Xe, ye in zip(X, y):
            if ye.sum() > 0:
                X2.append(Xe)
                y2.append(ye)
        X = np.array(X2)
        y = np.array(y2)
        self.lcc.fit(X, y)
        print("Trained Catecorical Classifier")

    def predict(self, text: str):
        X = np.array(self.tokenizer.transform([text]).todense()).reshape((1, -1))
        predicted = self.lcc.predict(X)
        labels_add = {label: bool(value) for label, value in zip(self.labels, predicted.flatten())}
        sent_pred = self.clsent.predict(X)
        labels_add.update(self.reverse_sent[sent_pred.flatten()[0]])

        assert "POSITIVE" in labels_add, "Klassifikation nicht eindeutig"

        if self.clrel.predict(X) == np.array([1]):
            labels_add[self.labels_relevance[0]] = True
        else:
            labels_add[self.labels_relevance[0]] = False
        return labels_add

    def train_item(self, tweet):
        text = tweet["tweet"]
        labeldict = tweet["labels"]
        for l in labeldict:
            if l not in self.labels and l not in self.labels_relevance and l not in self.labels_sent:
                print("RETRAIN!")
                return False
        y = self._labels2array(labeldict).reshape((1, -1))
        X = np.array(self.tokenizer.transform([text]).todense()).reshape((1, -1))

        sls = [l for l, v in labeldict.items() if l in self.labels_sent and v]
        if len(sls) == 1:
            ys = self.labels_sent[sls[0]]
        else:
            ys = self.labels_sent["NEUTRAL"]
        ys = np.array([ys])
        if self.labels_relevance[0] in labeldict and labeldict[self.labels_relevance[0]]:
            yr = np.array([1])
        else:
            yr = np.array([0])
        if y.sum() > 0:
            self.lcc.partial_fit(X, y)
        if yr.sum() > 0:
            self.clrel.partial_fit(X, yr)
        if ys.sum() > 0:
            self.clsent.partial_fit(X, ys)
        return True
示例#12
0
def flow_detection_classifier(classifier, stream):
    evaluator = EvaluatePrequential(show_plot=True, pretrain_size=2000, max_samples=50000)
    evaluator.evaluate(stream=stream, model=classifier)
    return evaluator


def make_stream(path, classifier):
    stream = FileStream(path)
    evaluator = flow_detection_classifier(classifier, stream)
    stream = evaluator.stream.y
    return stream


# Streams based on classifiers:
# KNNClassifier
make_stream(PATH, KNNClassifier(n_neighbors=5, max_window_size=1000, leaf_size=30))
make_stream(PATH, KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40))
#
# # HoeffdingTreeClassifier
make_stream(PATH, HoeffdingTreeClassifier(memory_estimate_period=1000000, grace_period=200, leaf_prediction='nba'))
make_stream(PATH, HoeffdingTreeClassifier(memory_estimate_period=2000000, grace_period=300, leaf_prediction='mc'))
#
# # AdditiveExpertEnsembleClassifier
make_stream(PATH, AdditiveExpertEnsembleClassifier(n_estimators=5, beta=0.8, gamma=0.1, pruning='weakest'))
make_stream(PATH, AdditiveExpertEnsembleClassifier(n_estimators=8, beta=0.9, gamma=0.3, pruning='oldest'))
#
# # VeryFastDecisionRulesClassifier
make_stream(PATH, VeryFastDecisionRulesClassifier(grace_period=200, tie_threshold=0.05, max_rules=20))
make_stream(PATH, VeryFastDecisionRulesClassifier(grace_period=300, tie_threshold=0.1, max_rules=30))
#
# # AdaptiveRandomForestClassifier
def test_knn():
    stream = SEAGenerator(random_state=1)

    learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = "KNNClassifier(leaf_size=40, max_window_size=2000, " \
                    "metric='euclidean', n_neighbors=8)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    learner.reset()
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
示例#14
0
driftStreams = [
    ConceptDriftStream(width=DRIFT_WIDTH,
                       position=DRIFT_BORDER + 2000,
                       random_state=i) for i in randomStates
]

##EKSPERYENT 1
adwin_param = [0.002, 0.005, 0.01]
ddm_param = [3, 5, 7]
ks_param1 = [100, 150, 200]
ks_param2 = [30, 50, 100]
ph_param1 = [25, 50, 75]
ph_param2 = [0.005, 0.01, 0.02]

knn = KNNClassifier()

stream = driftStreams[0]

for i in range(0, 3):
    trainX, trainY = stream.next_sample(2000)
    knn.partial_fit(trainX, trainY)

    adwin = ADWIN(delta=adwin_param[i])
    ddm = DDM(out_control_level=ddm_param[i])
    kswin1 = KSWIN(window_size=ks_param1[i])
    # kswin2 = KSWIN(stat_size=ks_param2[i])
    ph1 = PageHinkley(threshold=ph_param1[i])
    ph2 = PageHinkley(delta=ph_param2[i])

    adwin_results = []
    metrics=['mean_square_error', 'mean_absolute_error'])

eval1.evaluate(stream=dstream, model=model_hatr)

#############################################################################################

# Applying KNN Classifier on the synthetic data stream
from skmultiflow.lazy import KNNClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.file_stream import FileStream
import pandas as pd
import numpy as np

dstream = FileStream('data_stream.csv')
dstream.prepare_for_use()
knn_class = KNNClassifier(n_neighbors=10, max_window_size=1000)

# Prequential Evaluation
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=1000,
                                max_samples=10000,
                                metrics=['accuracy'])
# Run the evaluation
evaluate1.evaluate(stream=dstream, model=knn_class)

###################################################

# Applying KNN ADWIN Classifier on the synthetic data stream
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.sea_generator import SEAGenerator
示例#16
0
def KNN(n_neighbors=5, max_window_size=1000, leaf_size=30):     # pragma: no cover
    warnings.warn("'KNN' has been renamed to 'KNNClassifier' in v0.5.0.\n"
                  "The old name will be removed in v0.7.0", category=FutureWarning)

    return KNNClassifier(n_neighbors=n_neighbors, max_window_size=max_window_size, leaf_size=leaf_size)
示例#17
0
#X, y = stream.next_sample(5000)

metrics = ['accuracy', 'kappa', 'kappa_m', 'kappa_t', 'running_time', 'model_size'] 
evaluator = EvaluatePrequential(max_samples = 30000, n_wait = 100, show_plot = True, metrics = metrics) 

my_knn = MyKNNClassifier(standardize = True, weighted_vote = False)
evaluator.evaluate(stream = stream, model = [my_knn], model_names = ['My_KNN'])
cm = evaluator.get_mean_measurements(0).confusion_matrix
print("Recall per class")
for i in range(cm.n_classes):
    recall = cm.data[(i,i)]/cm.sum_col[i] \
    if cm.sum_col[i] != 0 else 'Ill-defined'
    print("Class {}: {}".format(i, recall))

#All the methods that we need to test
knn = KNNClassifier()
ht = HoeffdingTreeClassifier(leaf_prediction = 'mc')
htnb = HoeffdingTreeClassifier(leaf_prediction = 'nb')
nb = NaiveBayes()
hoef = HoeffdingTreeClassifier()

#Evaluating all methods together
evaluator.evaluate(stream = stream, model = [knn, ht, htnb, nb, hoef], model_names = ['KNN', 'HTMC', 'HTNB', 'NB', 'HT'])
cm = evaluator.get_mean_measurements(0).confusion_matrix

print("Recall per class")
for i in range(cm.n_classes):
    recall = cm.data[(i,i)]/cm.sum_col[i] \
    if cm.sum_col[i] != 0 else 'Ill-defined'
    print("Class {}: {}".format(i, recall))