Пример #1
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KS":
                self.cdd = [KSWIN(alpha=self.confidence, w_size=self.window_size) for elem in X.T]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN(delta=self.confidence) for elem in X.T]
            if self.drift_detector == "DIST":
                self.cdd = [KSWIN(self.confidence, w_size=self.window_size) for c in self.classes_]
        self.init_drift_detection = False
        self.drift_detected = False

        if self.drift_detector == "DIST":
            try:
                class_prototypes = [self.w_[self.c_w_ == elem] for elem in self.classes_]
                new_distances = dict(
                    [(c, self.calcDistances(pts, X[Y == c])) for c, pts in zip(self.classes_, class_prototypes)])
                for (c, d_new), detector in zip(new_distances.items(), self.cdd):
                    detector.add_element(d_new)
                    if detector.detected_change():
                        self.drift_detected = True
            except Exception:
                print("Warning: Current Batch does not contain all labels!")
                # ValueError('zero-size array to reduction operation maximum which has no identity',)
                # In this batch not every label is present
        else:
            if not self.init_drift_detection:
                for elem, detector in zip(X.T, self.cdd):
                    for e in elem:
                        detector.add_element(e)
                        if detector.detected_change():
                            self.drift_detected = True

        return self.drift_detected
Пример #2
0
    def test_kswin(self):
        kswin = KSWIN(alpha=0.001)
        stream = SEAGenerator(classification_function=2,
                              random_state=112,
                              balance_classes=False,
                              noise_percentage=0.28)
        stream.prepare_for_use()

        stream.restart()
        detections, mean = [], []

        print("\n--------------------\n")
        for i in range(10000):
            data = stream.next_sample(10)
            batch = data[0][0][0]
            mean.append(batch)
            kswin.add_element(batch)
            if kswin.detected_change():
                print("\rIteration {}".format(i))
                print("\r KSWINReject Null Hyptheses")
                print(np.mean(mean))
                mean = []
                detections.append(i)

        print("----- Number of detections: " + str(len(detections)) + " -----")
        self.assertGreaterEqual(len(detections), 10)
Пример #3
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [KSWIN(w_size = 100, stat_size = 30, alpha=self.confidence) for elem in X.T]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            if self.drift_detector == "KSVEC":
                self.cdd = KSVEC(vec_size=X.shape[1])
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            if self.drift_detector == "KSVEC":
                self.cdd.add_element(X)
                if self.cdd.detected_change():
                    self.drift_detected = True
            else:
                for elem, detector in zip(X.T, self.cdd):
                    for e in elem:
                        detector.add_element(e)
                        if detector.detected_change():
                            self.drift_detected = True
                            self.n_detections = self.n_detections +1

        return self.drift_detected
Пример #4
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [
                    KSWIN(w_size=100, stat_size=30, alpha=self.confidence)
                    for elem in X.T
                ]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            for elem, detector in zip(X.T, self.cdd):
                for e in elem:
                    detector.add_element(e)
                    if detector.detected_change():
                        self.drift_detected = True
                        self.n_detections = self.n_detections + 1

        return self.drift_detected


# if name=="__main__":
#     from skmultiflow import
Пример #5
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_handling == 'KS':
                self.cdd = [KSWIN(alpha=self.confidence) for elem in X.T]
            if self.drift_handling == 'ADWIN':
                self.cdd = [ADWIN(delta=self.confidence) for elem in X.T]
        self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            for elem, detector in zip(X.T, self.cdd):
                for e in elem:
                    detector.add_element(e)
                    if detector.detected_change():
                        self.drift_detected = True

        return self.drift_detected
Пример #6
0
 def test_data(self):
     kswin = KSWIN(data="st")
     self.assertIsInstance(kswin.window, list)
Пример #7
0
 def test_alpha(self):
     with self.assertRaises(ValueError):
         KSWIN(alpha=-0.1)
     with self.assertRaises(ValueError):
         KSWIN(alpha=1.1)
     KSWIN(alpha=0.5)
Пример #8
0
drift_detected_high = []
drift_detected_low = []

stream = ReoccuringDriftStream(SEAGenerator(classification_function=0),
                               SEAGenerator(classification_function=2),
                               position=2000,
                               width=1000,
                               pause=1000)

stream.prepare_for_use()

stream.next_sample()
"""Init KSWIN for every dimension"""
kswin_high = [
    KSWIN(alpha=1e-5, w_size=300, stat_size=30, data=None)
    for i in range(10000)
]
kswin_low = [
    KSWIN(alpha=1e-5, w_size=300, stat_size=30, data=None) for i in range(1000)
]
"""Calc amount of random features we need"""
n_rand_dims = 10000 - stream.current_sample_x.size

sparse_transformer_li = SparseRandomProjection(n_components=1000,
                                               density='auto')

current_sample_x = np.append(stream.current_sample_x,
                             np.random.randint(2, size=n_rand_dims)).reshape(
                                 1, stream.n_features + n_rand_dims)
"""Create projection matrix"""
Пример #9
0
def main():
    
    overall_kswin_tp = overall_kswin_tn = overall_kswin_fp = overall_kswin_fn = 0
    overall_adwin_tp = overall_adwin_tn = overall_adwin_fp = overall_adwin_fn = 0
#   mebwin_drifts = []
    overall_k_swmebwin_tp = overall_k_swmebwin_tn = overall_k_swmebwin_fp = overall_k_swmebwin_fn = 0
    overall_swmebwin_tp = overall_swmebwin_tn = overall_swmebwin_fp = overall_swmebwin_fn = 0
    overall_eddm_tp = overall_eddm_tn = overall_eddm_fp = overall_eddm_fn = 0
    overall_ddm_tp = overall_ddm_tn = overall_ddm_fp = overall_ddm_fn = 0
    
    for stream in streams:
        print(stream.name)
        
        f = open('drifts.txt', 'a+')
        f.write(f'**{stream.name}**\n\n')
        f.close()
                
        stream.prepare_for_use()
        
        stream.next_sample()
        
#        mebwin = MEBWIN(epsilon=0.1, sensitivity=0.98, w_size=100, stat_size=30)
        adwin = []
        kswin = []
        ddm = DDM(min_num_instances=30)
        eddm = EDDM()
        
        data = []
        labels = []
        predictions = []
        
        kswin_drifts = []
        adwin_drifts = []
#        mebwin_drifts = []
        k_swmebwin_drifts = []
        swmebwin_drifts = []
        eddm_drifts = []
        ddm_drifts = []
        
        swmebwin = SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
#        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05, gamma=10**10)
        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
        # gamma maybe 1.0 / stream.current_sample_x.shape[1]
        RANGE = 1000000
        DIM = 50
        # - 2 because first drift is at 2000 not 1000 and last drift is not detectable
#        COUNT_DRIFTS = RANGE / 1000 - 2
        
        n_rand_dims = DIM - stream.current_sample_x.size
        multiply = n_rand_dims // stream.current_sample_x.size
        
        # partial fit -> pretrain
        for _m in range(multiply):
            current_sample_x = np.array([[]])
            current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
     
        bayes = NaiveBayes()
        bayes.partial_fit(np.array(current_sample_x), list(stream.current_sample_y.ravel()))
        
        for j in range(DIM):
            adwin.append(ADWIN(delta=0.002))
            kswin.append(KSWIN(w_size=300, stat_size=30, alpha=0.0001))
                    
        """Add dims"""
        for i in range(RANGE):
            current_sample_x = np.array([[]])
            for _m in range(multiply):
                current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
            data.append(current_sample_x.ravel())
            labels.append(stream.current_sample_y.ravel()[0])
            predictions.append(0 if bayes.predict(current_sample_x) == labels[i] else 1)
            bayes.partial_fit(current_sample_x, list(stream.current_sample_y.ravel()))
            stream.next_sample()
        
        # MEBWIN
    #    start = time.time()
    #    for i in range(RANGE):
    #        mebwin.add_element(data[i])
    #        
    #        if mebwin.change_detected is True:
    #            mebwin_drifts.append(i)
    #
    #    f = open('drifts.txt', 'a+')
    #    f.write(f'MEBWIN detected {len(mebwin_drifts)} drifts in {time.time() - start} {mebwin_drifts}\n\n')
    #    f.close() 
    #    print(f'MEBWIN took {time.time() - start} sec and detected {len(mebwin_drifts)} drifts')
    
        # Kernel SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            k_swmebwin.add_element(value=data[i], label=labels[i])
            
            if k_swmebwin.change_detected is True:
                k_swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(k_swmebwin_drifts, RANGE)
        overall_k_swmebwin_tp += tp
        overall_k_swmebwin_tn += tn
        overall_k_swmebwin_fp += fp
        overall_k_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'K-SWMEB detected {len(k_swmebwin_drifts)} drifts in {time.time() - start} {k_swmebwin_drifts}\n\n')
        f.close()
        print(f'K-SW-MEBWIN took {end} sec and detected {len(k_swmebwin_drifts)} drifts\n')
             
        # SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            swmebwin.add_element(value=data[i], label=labels[i])
            
            if swmebwin.change_detected is True:
                swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(swmebwin_drifts, RANGE)
        
        overall_swmebwin_tp += tp
        overall_swmebwin_tn += tn
        overall_swmebwin_fp += fp
        overall_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'SWMEB detected {len(swmebwin_drifts)} drifts in {time.time() - start} {swmebwin_drifts}\n\n')
        f.close()
        print(f'SW-MEBWIN took {end} sec and detected {len(swmebwin_drifts)} drifts\n')
                
        # ADWIN
        start = time.time()
        for i in range(RANGE):
            adwin_detected = False
        
            for j in range(data[i].size):
                adwin[j].add_element(data[i][j])
                if adwin[j].detected_change():
                    adwin_detected = True
                    
            if adwin_detected is True:
                adwin_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(adwin_drifts, RANGE)
        
        overall_adwin_tp += tp
        overall_adwin_tn += tn
        overall_adwin_fp += fp
        overall_adwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'ADWIN detected {len(adwin_drifts)} drifts in {time.time() - start} at {adwin_drifts}\n\n')
        f.close()
        print(f'ADWIN took {end} sec and detected {len(adwin_drifts)} drifts\n')
        
        # KSWIN
        start = time.time()
        for i in range(RANGE):
            kswin_detected = False
            
            for j in range(data[i].size):    
                kswin[j].add_element(data[i][j])
                if kswin[j].detected_change():
                    kswin_detected = True
                    
            if kswin_detected is True:
                kswin_drifts.append(i)
          
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(kswin_drifts, RANGE)
        
        overall_kswin_tp += tp
        overall_kswin_tn += tn
        overall_kswin_fp += fp
        overall_kswin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')     
        
        f = open('drifts.txt', 'a+')
        f.write(f'KSWIN detected {len(kswin_drifts)} drifts in {time.time() - start} at {kswin_drifts}\n\n')
        f.close()
        print(f'KSWIN took {end} sec and detected {len(kswin_drifts)} drifts\n')
        
        # EDDM
        start = time.time()
        for i in range(RANGE):
            eddm_detected = False
            
            eddm.add_element(predictions[i])
            
            if eddm.detected_change():
                eddm_detected = True
                    
            if eddm_detected is True:
                eddm_drifts.append(i)
                
        end = time.time() - start
          
        f1, tp, fp, tn, fn = confusion_matrix_stats(eddm_drifts, RANGE)
        
        overall_eddm_tp += tp
        overall_eddm_tn += tn
        overall_eddm_fp += fp
        overall_eddm_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'EDDM detected {len(eddm_drifts)} drifts in {time.time() - start} at {eddm_drifts}\n\n')
        f.close()
        print(f'EDDM took {end} sec and detected {len(eddm_drifts)} drifts\n')
        
        # DDM
        start = time.time()
        for i in range(RANGE):
            ddm_detected = False
            ddm.add_element(predictions[i])
            if ddm.detected_change():
                ddm_detected = True
                    
            if ddm_detected is True:
                ddm_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(ddm_drifts, RANGE)
        
        overall_ddm_tp += tp
        overall_ddm_tn += tn
        overall_ddm_fp += fp
        overall_ddm_fn += tn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'DDM detected {len(ddm_drifts)} drifts in {time.time() - start} at {ddm_drifts}\n\n')
        f.close()
        print(f'DDM took {end} sec and detected {len(ddm_drifts)} drifts\n')
        
    # OVERALL STATISTICS
    print(50 * '-')
    print('K-SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_k_swmebwin_tp, overall_k_swmebwin_fp, overall_k_swmebwin_tn, overall_k_swmebwin_fn)}')
    print(f'{overall_k_swmebwin_tp} true positives, {overall_k_swmebwin_fp} false positives')
    print(f'{overall_k_swmebwin_tn} true negatives, {overall_k_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_swmebwin_tp, overall_swmebwin_fp, overall_swmebwin_tn, overall_swmebwin_fn)}')
    print(f'{overall_swmebwin_tp} true positives, {overall_swmebwin_fp} false positives')
    print(f'{overall_swmebwin_tn} true negatives, {overall_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('KSWIN\n')
    print(f'Overall F1: {calc_f1(overall_kswin_tp, overall_kswin_fp, overall_kswin_tn, overall_kswin_fn)}')
    print(f'{overall_kswin_tp} true positives, {overall_kswin_fp} false positives')
    print(f'{overall_kswin_tn} true negatives, {overall_kswin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('ADWIN\n')
    print(f'Overall F1: {calc_f1(overall_adwin_tp, overall_adwin_fp, overall_adwin_tn, overall_adwin_fn)}')
    print(f'{overall_adwin_tp} true positives, {overall_adwin_fp} false positives')
    print(f'{overall_adwin_tn} true negatives, {overall_adwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('DDM\n')
    print(f'Overall F1: {calc_f1(overall_ddm_tp, overall_ddm_fp, overall_ddm_tn, overall_ddm_fn)}')
    print(f'{overall_ddm_tp} true positives, {overall_ddm_fp} false positives')
    print(f'{overall_ddm_tn} true negatives, {overall_ddm_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('EDDM\n')
    print(f'Overall F1: {calc_f1(overall_eddm_tp, overall_eddm_fp, overall_eddm_tn, overall_eddm_fn)}')
    print(f'{overall_eddm_tp} true positives, {overall_eddm_fp} false positives')
    print(f'{overall_eddm_tn} true negatives, {overall_eddm_fn} false negatives')
    print(50* '-')