示例#1
0
def process_by_ml_name(ml):
    from dataset_loader import DataSetLoader
    from sklearn import cross_validation
    print 'start ', ml
    loader = DataSetLoader()
    x, y = loader.loadData()[DataSetLoader.dataset_name[0]]
    score_lst = []    
    for ml in ml:
        print 'start cross val'
        scores = cross_validation.cross_val_score(ml, x, y, cv=5)
        print 'end cross val'
        score_lst.append(scores.mean())
    return score_lst      
示例#2
0
文件: cmp.py 项目: chaluemwut/cmpml
 def load_dataset(self):
     loader = DataSetLoader()
     lst = loader.loadData()
     return lst
示例#3
0
 def process(self):
     ml_lst = self.gen_ml_lst()
     dataset_lst = self.load_dataset()
     result = {}
         
     ml_value = ml_lst['svm']
     self.log_debug.info('*************************************** ' + self.dataset_name)
     all_data = []
     self.log_debug.info('***** start ' + self.dataset_name)
     if self.dataset_name == 'shuttle':
         d_loader = DataSetLoader()
         data_value = d_loader.svm_shuttle('data/statlog/shuttle.data')['shuttle']
         x_data = data_value[0]
         y_data = data_value[1]
         if is_run_missing:
             print 'before************** ', x_data[0], y_data
             x_data, y_data = self.svm_uni_remove(x_data, y_data)
             print 'after****************', x_data[0], y_data
     elif self.dataset_name == 'segment':
         d_loader = DataSetLoader()
         data_value = d_loader.load_segment()
         x_data = data_value[0]
         y_data = data_value[1]              
     else:
         data_value = dataset_lst[self.dataset_name]
         x_data = data_value[0]
         y_data = data_value[1]
         if is_run_missing:
             print 'before************** ', x_data[0], y_data
             x_data, y_data = self.remove_by_chi2_process(x_data, y_data)
             print 'after****************', x_data[0], y_data
     datasets_data_lst = []
     ml = None 
     for d_size in self.data_size:
         self.log_debug.info('***** start size ' + str(d_size))
         ran_num = random.randint(1, 100)
         x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=d_size, random_state=ran_num)
         print 'x train ', x_train
         self.log_debug.info('********* start cross validation')
         ml = self.cross_validation(ml_value, x_train, y_train)
         self.log_kernel.info('************* kernel : ' + str(ml.kernel) + " | degree : " + str(ml.degree))
         self.log_debug.info('************* end cross validation')
         acc_lst = []
         f1_lst = []
         time_pred = []
         total_ins = []
         precision_lst = []
         recall_lst = []
         for i in range(0, self.loop):
             self.log_debug.info('loop {} size {} data set {} ml {}'.format(i, d_size, self.dataset_name, 'svm'))
             ran_num = random.randint(1, 10000)
             x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=d_size, random_state=ran_num)
             try:
                 ml_c = copy.deepcopy(ml)
                 ml_c.fit(x_train, y_train)
                 start = time.time()
                 y_pred = ml_c.predict(x_test)
             except Exception as e:
                 self.log_error.info(str(e))
             total_time = time.time() - start
             acc = accuracy_score(y_test, y_pred)
             print 'y_test ', y_test
             print 'y_pred ', y_pred
             fsc = f1_score(y_test, y_pred)
             acc_lst.append(acc)
             f1_lst.append(fsc)
             time_pred.append(total_time)
             total_ins.append(len(y_test))
             pre_score = precision_score(y_test, y_pred)
             recall = recall_score(y_test, y_pred)
             precision_lst.append(pre_score)
             recall_lst.append(recall)
             self.log_debug.info('------------- end loop -----')
         datasets_data_lst.append(np.mean(acc_lst))
         datasets_data_lst.append(float("{:.5f}".format(np.mean(f1_lst))))
         datasets_data_lst.append(np.mean(time_pred))
         datasets_data_lst.append(np.mean(total_ins))
         self.log.info('---------------------------------------------') 
         self.log.info('data size ' + str(d_size) + ' data set ' + self.dataset_name) 
         self.log.info(acc_lst)
         self.log.info(f1_lst)
         self.log.info(time_pred)
         self.log.info(total_ins)
         self.log.info('---------------------------------------------')
         self.log_debug.info('*********** end size')
     self.log.info('ml type ' + str(ml.kernel))                
     all_data.append(datasets_data_lst)
     self.log_debug.info('******* end data set')
     self.result[self.dataset_name] = all_data
     self.log_debug.info('************ end ml')
     file_name = '{}_svm_result_{}.obj'.format(self.dataset_name, self.loop)
     pickle.dump(self.result, open(file_name, 'wb'))
     self.report_all(result)
示例#4
0
文件: svm.py 项目: chaluemwut/suml
        y_pred = self.predict(X)
        average_score = (accuracy_score(y, y_pred) + f1_score(y, y_pred)) / 2.0
        return average_score
    
    def predict(self, x):
        f_result = open(self.path_test_data, 'w')
        self.__write_data_file(f_result, x, [0] * len(x))
        f_result.close()
        create_predict = libsvm_path + '/svm-predict' + ' {} {} {}'.format(self.path_test_data,
                                                                    self.path_model_result,
                                                                    self.path_result)
        print create_predict
        os.system(create_predict)
        return self.__read_result()

    def get_params(self, deep=True):
        return {"kernel": self.kernel}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
            
if __name__ == '__main__':
    from dataset_loader import DataSetLoader
    from sklearn.cross_validation import train_test_split
    loader = DataSetLoader()
    x, y = loader.loadData()['heart']
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.75, random_state=42)
    ml = LibSVMWrapper(kernel=0)
    ml.fit(x_train, y_train)