def documents_with_features(self): """ 基本耗时都在posseg.cut """ print "\ntfidf需要的文档列表" return ParallelData.process(self.model, 'dict', self.model.pickle_path('documents_with_features'), item_func=lambda item1: Counter(self.model.tags_model__extract_features(item1)), )
def documents_with_segments(self): """ 纯分词 """ return ParallelData.process( self.model, 'dict', cache_filename=self.model.pickle_path('documents_with_segments'), item_func=lambda item1: Counter(jieba_parse(item1.item_content)), )
def documents_with_features(self): """ 基本耗时都在posseg.cut """ print "\ntfidf需要的文档列表" return ParallelData.process( self.model, 'dict', self.model.pickle_path('documents_with_features'), item_func=lambda item1: Counter( self.model.tags_model__extract_features(item1)), )
def documents_with_segments(self): """ 纯分词 """ return ParallelData.process(self.model, 'dict', cache_filename=self.model.pickle_path('documents_with_segments'), item_func=lambda item1: Counter(jieba_parse(item1.item_content)), )