示例#1
0
 def data_process(self, sep='\t'):
     """
     数据处理
     :return:
     """
     if '.csv' not in self.train_data_path:
         self.train_data_path = data2csv(self.train_data_path, sep)
     self.index2label, self.label2index, self.labels, train_data = data_preprocess(
         self.train_data_path)
     self.num_classes = len(self.index2label)
     if self.valid_data_path:
         if '.csv' not in self.valid_data_path:
             self.valid_data_path = data2csv(self.valid_data_path, sep)
         _, _, _, valid_data = data_preprocess(self.valid_data_path)
     else:
         train_data, valid_data = split(train_data, self.split)
     if self.test_data_path:
         if '.csv' not in self.test_data_path:
             self.test_data_path = data2csv(self.test_data_path, sep)
         _, _, _, test_data = data_preprocess(self.test_data_path)
     else:
         test_data = []
     self.train_generator = Data_Generator(train_data, self.label2index,
                                           self.tokenizer, self.batch_size,
                                           self.max_len)
     self.valid_generator = Data_Generator(valid_data, self.label2index,
                                           self.tokenizer, self.batch_size,
                                           self.max_len)
     self.test_generator = Data_Generator(test_data, self.label2index,
                                          self.tokenizer, self.batch_size,
                                          self.max_len)
示例#2
0
 def data_process(self, sep='\t'):
     """
     数据处理
     :return:
     """
     self.index2label, self.label2index, self.labels, train_data = json_data_process(
         self.train_data_path)
     # self.index2label, self.label2index, self.labels, train_data = txt_data_process(self.train_data_path)
     print(self.label2index, "1111111")
     self.num_classes = len(self.index2label)
     if self.valid_data_path:
         _, _, _, valid_data = json_data_process(self.valid_data_path)
         # _, _, _, valid_data = txt_data_process(self.valid_data_path)
     else:
         train_data, valid_data = split(train_data, self.split)
     if self.test_data_path:
         _, _, _, test_data = json_data_process(self.test_data_path)
         # _, _, _, test_data = txt_data_process(self.test_data_path)
     else:
         test_data = []
     self.train_generator = datagenerator(train_data, self.label2index,
                                          self.tokenizer, self.batch_size,
                                          self.max_len)
     self.valid_generator = datagenerator(valid_data, self.label2index,
                                          self.tokenizer, self.batch_size,
                                          self.max_len)
     self.test_generator = datagenerator(test_data, self.label2index,
                                         self.tokenizer, self.batch_size,
                                         self.max_len)
示例#3
0
 def data_process(self):
     labels, train_data = data_process(self.train_data_path)
     if self.valid_data_path:
         _, self.valid_data = data_process(self.valid_data_path)
     else:
         train_data, self.valid_data = split(train_data, self.split)
     if self.test_data_path:
         _, self.test_data = data_process(self.test_data_path)
     self.index2label = dict(enumerate(labels))
     self.label2index = {j: i for i, j in self.index2label.items()}
     self.num_classes = len(labels) * 2 + 1
     self.labels = labels
     self.train_generator = Data_Generator(train_data, self.batch_size,
                                           self.tokenizer, self.label2index,
                                           self.max_len)
     logger.info('data process done')