Exemplo n.º 1
0
 def load_data_from_file1(self):
     print('Load BOW from file.')
     self.bag = []
     with open('bag4000.txt', encoding='utf8') as f:
         for line in f:
             self.bag.append(line.strip())
     print('Get bag. Size:', len(self.bag))
     self.df_count, self.count = pretreatment.count_words_in_label(
         self.sizeofdata)
     self.bag = list(set(self.bag) & set(self.df_count.columns))
     self.set_bag = set(self.bag)
     self.df_count = self.df_count[list(self.set_bag)]
Exemplo n.º 2
0
 def load_data_from_file2(self):
     print('加载词袋...')
     self.bag = []
     with open('bag4000.txt', encoding='utf8') as f:
         for line in f:
             self.bag.append(line.strip())
     print('词袋大小:', len(self.bag))
     self.df_count, self.count = pretreatment.count_words_in_label(
         self.sizeofdata)
     self.bag = list(set(self.bag) & set(self.df_count.columns))
     self.set_bag = set(self.bag)
     #降维最后得到特征词对应的类中词出现的数量
     self.df_count = self.df_count[list(self.set_bag)]
Exemplo n.º 3
0
 def load_train_data(self):
     """
     从载入数据
     :param path:
     :return:
     """
     print('Load train data...')
     self.df_count, self.bag, self.count, self.idf = pretreatment.pre_treat(
         count=self.sizeofdata, sizeOfBOW=4000)
     self.df_count, _ = pretreatment.count_words_in_label(self.sizeofdata)
     self.set_bag = set(self.bag)
     self.df_count = self.df_count[self.bag]
     print('size of bag:', len(self.bag))
     print('Bag get.')