Пример #1
0
 def documents_with_features(self):
     """ 基本耗时都在posseg.cut """
     print "\ntfidf需要的文档列表"
     return ParallelData.process(self.model, 'dict',
                                 self.model.pickle_path('documents_with_features'),
                                 item_func=lambda item1: Counter(self.model.tags_model__extract_features(item1)),
                                 )
Пример #2
0
 def documents_with_segments(self):
     """ 纯分词 """
     return ParallelData.process(
         self.model,
         'dict',
         cache_filename=self.model.pickle_path('documents_with_segments'),
         item_func=lambda item1: Counter(jieba_parse(item1.item_content)),
     )
Пример #3
0
 def documents_with_features(self):
     """ 基本耗时都在posseg.cut """
     print "\ntfidf需要的文档列表"
     return ParallelData.process(
         self.model,
         'dict',
         self.model.pickle_path('documents_with_features'),
         item_func=lambda item1: Counter(
             self.model.tags_model__extract_features(item1)),
     )
Пример #4
0
 def documents_with_segments(self):
     """ 纯分词 """
     return ParallelData.process(self.model, 'dict',
                                 cache_filename=self.model.pickle_path('documents_with_segments'),
                                 item_func=lambda item1: Counter(jieba_parse(item1.item_content)),
                                 )