def test_naive_bayes(test_path): stream = SEAGenerator(random_state=1) stream.prepare_for_use() learner = NaiveBayes() cnt = 0 max_samples = 5000 y_pred = array('i') X_batch = [] y_batch = [] y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y, classes=stream.target_values) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy') y_proba_expected = np.load(test_file) assert np.allclose(y_proba, y_proba_expected) expected_info = 'NaiveBayes: nominal attributes: [] - ' assert learner.get_info() == expected_info learner.reset() learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500])) expected_score = 0.9378757515030061 assert np.isclose( expected_score, learner.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:]))) assert 'estimator' == learner.get_class_type() assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
class LDDDSDA(BaseDistributionDetector): def __init__(self, batch_size=100, train_size=100, rho=0.1, alpha=0.05, base_learner=NaiveBayes()): super().__init__() self.w = batch_size self.l = base_learner self.n = train_size self.alpha = alpha self.rho = rho self.trained = False self.d_train_X, self.d_train_y = [], [] self.d_buffer_X, self.d_buffer_y = [], [] self.reset() def reset(self): super().reset() def add_element(self, X, y): if self.in_concept_change: self.reset() X, y = np.asarray(X), np.asarray(y) # if X.ndim != 1 or y.ndim != 1: # raise ValueError("input_value should has one dimension") if (not self.trained) and len(self.d_train_X) < self.n: self.d_train_X.append(X) self.d_train_y.append(y) if len(self.d_train_X) == self.n: self.l.partial_fit(np.asarray(self.d_train_X), np.asarray(self.d_train_y)) self.trained = True return if len(self.d_train_X) < self.w: self.d_train_X.append(X) self.d_train_y.append(y) return self.d_buffer_X.append(X) self.d_buffer_y.append(y) if len(self.d_buffer_X) < self.w: return self.d_train_X, self.d_train_y = self.ldd_dis(np.asarray(self.d_train_X), np.asarray(self.d_train_y), np.asarray(self.d_buffer_X), np.asarray(self.d_buffer_y)) self.l = NaiveBayes() self.l.fit(self.d_train_X, self.d_train_y) self.d_train_X = self.d_train_X.tolist() self.d_train_y = self.d_train_y.tolist() print(len(self.d_train_X)) self.d_buffer_X = [] self.d_buffer_y = [] return def predict(self, X): return self.l.predict(X) def ldd_dis(self, d1_X, d1_y, d2_X, d2_y): d = np.append(d1_X, d2_X, axis=0) d_y = np.append(d1_y, d2_y, axis=0) d1_dec, d1_sta, d1_inc = [], [], [] d2_dec, d2_sta, d2_inc = [], [], [] kdtree = KDTree(d) d_knn = [] for i in range(d.shape[0]): d_knn.append(set(kdtree.query(X=d[i:i+1], k=int(d.shape[0] * self.rho), return_distance=False)[0])) indexes = np.arange(d.shape[0]) np.random.shuffle(indexes) _d1 = set(indexes[:d1_X.shape[0]]) _d2 = set(indexes[d1_X.shape[0]:]) deltas = [] for i in range(d.shape[0]): x1 = len(d_knn[indexes[i]] & _d1) x2 = len(d_knn[indexes[i]] & _d2) if i < d1_X.shape[0]: deltas.append(x2 / x1 - 1) else: deltas.append(x1 / x2 - 1) delta_std = np.std(deltas, ddof=1) theta_dec = stats.norm.ppf(1 - self.alpha, 0, delta_std) theta_inc = stats.norm.ppf(self.alpha, 0, delta_std) _d1 = set(np.arange(d1_X.shape[0])) _d2 = set(np.arange(d1_X.shape[0], d.shape[0])) for i in range(d.shape[0]): x1 = len(d_knn[i] & _d1) x2 = len(d_knn[i] & _d2) if i < d1_X.shape[0]: delta = x2 / x1 - 1 if delta < theta_dec: d1_dec.append(i) elif delta > theta_inc: d1_inc.append(i) else: d1_sta.append(i) else: delta = x1 / x2 - 1 if delta < theta_dec: d2_dec.append(i) elif delta > theta_inc: d2_inc.append(i) else: d2_sta.append(i) if len(d1_dec) == 0 and len(d2_inc) == 0: return d1_X, d1_y self.in_concept_change = True aux = [] if len(d2_dec) != 0: aux.append(len(d1_inc) / len(d2_dec)) if len(d2_sta) != 0: aux.append(len(d1_sta) / len(d2_sta)) if len(d2_inc) != 0: aux.append(len(d1_dec) / len(d2_inc)) k = min(aux) d2_dec += d1_inc[:int(k * len(d2_dec))] d2_sta += d1_sta[:int(k * len(d2_sta))] d2_inc += d1_dec[:int(k * len(d2_inc))] aux_indexes = d2_inc + d2_sta + d2_dec r = self.w / len(aux_indexes) d2_dec = d2_dec[:int(len(d2_dec)*r)] d2_sta = d2_sta[:int(len(d2_sta)*r)] d2_inc = d1_inc[:int(len(d2_inc)*r)] aux_indexes = d2_inc + d2_sta + d2_dec return d[aux_indexes], d_y[aux_indexes]