Exemplo n.º 1
0
    def _build_data(self):
        '''搭建数据模型'''
        # 读数据
        data_0_df = pd.read_csv(self.my_config.normal_data_path)
        data_1_df = pd.read_csv(self.my_config.characteristic_data_path)
        data_0 = data_0_df.values
        data_1 = data_1_df.values

        # 数据预处理
        data_0, data_1 = data_preprocess(data_0, data_1)
        self.X, self.Y = get_full_train_data(data_0, data_1)
        self.X, self.X_test, self.Y, self.Y_test = train_test_split(self.X, self.Y, test_size=0.4)
Exemplo n.º 2
0
    def build_data(self):
        '''搭建数据模型'''
        # 读数据
        data_0_df = pd.read_csv('.'+self.config.normal_data_path)
        data_1_df = pd.read_csv('.'+self.config.characteristic_data_path)
        data_0 = data_0_df.values
        data_1 = data_1_df.values

        # 数据预处理
        data_0, data_1 = data_preprocess(data_0, data_1)
        self.X, self.Y = get_full_train_data(data_0, data_1)
        #划分训练集和测试集
        #self.x_train,self.x_test,self.y_train,self.y_test = train_test_split(self.x_train,self.y_train,test_size=0.15)

        self.X_text , self.X_editor = split_textEditor_data(self.X)
Exemplo n.º 3
0
    def build(self):
        # 读数据
        data_0_df = pd.read_csv(self.config.normal_data_path)
        data_1_df = pd.read_csv(self.config.characteristic_data_path)
        data_0 = data_0_df.values
        data_1 = data_1_df.values

        # 数据预处理
        data_0, data_1 = data_preprocess(data_0, data_1)
        # 获得全部的数据
        self.X, self.Y = get_full_train_data(data_0, data_1)
        #划分测试集和训练集
        self.X, self.test_X_final, self.Y, self.test_Y_final = train_test_split(
            self.X, self.Y, test_size=0.3)

        assert len(self.X) == len(self.Y)
Exemplo n.º 4
0
    def _build_data(self):
        '''搭建数据模型'''
        # 读数据
        data_0_df = pd.read_csv(self.config.normal_data_path)
        data_1_df = pd.read_csv(self.config.characteristic_data_path)
        data_0 = data_0_df.values
        data_1 = data_1_df.values

        #数据预处理
        data_0,data_1=data_preprocess(data_0,data_1)
        if self.config.use_cross_validation:
            self.X,self.Y = get_full_train_data(data_0,data_1)
        else:
            #获得训练和测试数据
            self.train,self.test=get_train_and_test_data(data_0, data_1)

            #把label分出来
            self.x_train,self.y_train\
                ,self.x_test,self.y_test=split_data_label(self.train,self.test)