Exemplo n.º 1
0
 def find_threshold(self, train, val, seed=None):
     if seed:
         random.seed(seed)
     ratio = np.arange(0.2, 0.7, 0.05)
     # Train
     training_set = pc.join_list_df(train)
     X_train = [x[:-1] for x in training_set]
     y_train = [x[-1] for x in training_set]
     if self.class_weight:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight='balanced')
     else:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight=None)
     svc.fit(X_train, y_train)
     ratios_results = []
     val_set = pc.join_list_df(val)
     X_val = [x[:-1] for x in val_set]
     y_val = [x[-1] for x in val_set]
     border_amount = len(y_val) * 0.3
     for r in ratio:
         self.threshold = r
         first_layer = self.first_layer_classifier(X_val, svc)
         prediction = self.second_layer_classifier(X_val, first_layer, svc)
         cm_d = np.sum(confusion_matrix(y_val, prediction).diagonal()) * 1.0
         if cm_d / len(val) > border_amount:
             ratios_results.append(accuracy_score(y_val, prediction))
         else:
             ratios_results.append(0.0)
     return ratio[np.argmax(ratios_results)]
Exemplo n.º 2
0
 def train(self,training_set_list):
     columns = training_set_list[0].columns
     training_set = pd.DataFrame(pc.join_list_df(training_set_list))
     training_set.columns = columns
     if self.p1 <= -1:
         sorted_phases = get_sorted_phases(training_set)
         self.p1 = sorted_phases[-2]+1
         self.p2 = sorted_phases[-1]+1
     self.threshold = find_threshold(training_set_list,self.p1,self.p2)
Exemplo n.º 3
0
 def find_threshold(self, train, val, seed=None):
     if seed:
         random.seed(seed)
     ratio = np.arange(0.0, 1.0, 0.1)
     # Train
     training_set = pc.join_list_df(train)
     X_train = [x[:-1] for x in training_set]
     y_train = [x[-1] for x in training_set]
     class_weight = get_proportion(y_train)
     if self.class_weight:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight=class_weight)
     else:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight=None)
     svc.fit(X_train, y_train)
     ratios_results = []
     val_set = pc.join_list_df(val)
     X_val = [x[:-1] for x in val_set]
     y_val = [x[-1] for x in val_set]
     border_amount = len(y_val) * 0.3
     _, coeff = pc.combination_before_after(X_val, y_val, svc)
     self.coeff = coeff
     for r in ratio:
         self.threshold = r
         first_layer = self.first_layer_classifier(X_val, svc)
         second_layer = pc.second_layer_combination_test(X_val, coeff, svc)
         prediction = []
         for i, element in enumerate(first_layer):
             if element == -1:
                 prediction.append(second_layer[i])
             else:
                 prediction.append(first_layer[i])
         cm_d = np.sum(confusion_matrix(y_val, prediction).diagonal()) * 1.0
         if cm_d / len(val) > border_amount:
             ratios_results.append(accuracy_score(y_val, prediction))
         else:
             ratios_results.append(0.0)
     return ratio[np.argmax(ratios_results)]
Exemplo n.º 4
0
    def train(self, training_set_list):
        train, val = pc.split_df_list(training_set_list, 0.2, SEED)
        training_set = pc.join_list_df(train)

        # find threshold to split using train and val set
        self.t = self.find_threshold(train, val, SEED)
        # train svm with all the data
        training_set = pc.join_list_df(training_set_list)
        X_train = [x[:-1] for x in training_set]
        y_train = [x[-1] for x in training_set]
        if self.class_weight:
            svc = SVC(kernel='linear',
                      random_state=SEED,
                      probability=True,
                      class_weight='balanced')
        else:
            svc = SVC(kernel='linear',
                      random_state=SEED,
                      probability=True,
                      class_weight=None)
        svc.fit(X_train, y_train)
        self.svm = svc
Exemplo n.º 5
0
 def train(self, training_set_list):
     training_set = pc.join_list_df(training_set_list)
     X_train = [x[:-1] for x in training_set]
     y_train = [x[-1] for x in training_set]
     class_weight = get_proportion(y_train)
     if self.class_weight:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight=class_weight)
     else:
         svc = SVC(kernel='linear',
                   random_state=SEED,
                   probability=True,
                   class_weight=None)
     svc.fit(X_train, y_train)
     self.svm = svc
Exemplo n.º 6
0
 def train(self,training_set_list):
     columns = training_set_list[0].columns
     training_set = pd.DataFrame(pc.join_list_df(training_set_list))
     training_set.columns = columns
     self.biggest_phase = get_sorted_phases(training_set)[-1]+1