class Extractor(object): """中文文摘提取器""" def __init__(self, stop_words_file = None): super(Extractor, self).__init__() self.keyword_extraction = KeywordExtraction(stop_words_file=stop_words_file) self.sentence_extraction = SentenceExtraction(stop_words_file=stop_words_file) def keyword_train(self, text, num=10): self.keyword_extraction.train(text=text, window=2, lower=False, speech_tag_filter=True) keyword_res = self.keyword_extraction.get_keywords(num=num, word_min_len=2) keyphrase_res = self.keyword_extraction.get_keyphrases(keywords_num=20, min_occur_num=2) return keyword_res,keyphrase_res def sentence_train(self,text,sentences_percent='10%',sim_func='Standard'): self.sentence_extraction.train(text=text, lower=True, speech_tag_filter=True,source='all_filters',sim_func=sim_func) abstract = self.sentence_extraction.get_key_sentences(sentences_percent=sentences_percent) return abstract
def __init__(self, stop_words_file = None): super(Extractor, self).__init__() self.keyword_extraction = KeywordExtraction(stop_words_file=stop_words_file) self.sentence_extraction = SentenceExtraction(stop_words_file=stop_words_file)