def key_word_test(): filename = 'key_test.txt' with codecs.open(filename, 'r', 'utf-8') as file: content = file.read() keys = extract_keywords(content) #print ','.join(keys) print summarize1(content) print summarize2(content) print summarize3(content)
def infosum(): print 'loading data' h = codecs.open(testnote, 'r', 'utf-8') text = h.read() text = re.sub(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d','', text) text = re.sub(r'\d\d\d\d-\d\d-\d\d','', text) text = re.sub(r'![.*](.*)','',text) text = re.sub(r'[.*](.*)','',text) print 'keywords:' tags = jieba.analyse.extract_tags(text, topK=15) print ','.join(tags) print 'sum1:' print summarize1(text) print 'sum2:' print summarize2(text) print 'sum3:' print summarize3(text) print 'length:' + str(len(text)) print 'sum:' + str(len(summarize2(text)))
def infosum(): print 'loading data' h = codecs.open(testnote, 'r', 'utf-8') text = h.read() text = re.sub(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d', '', text) text = re.sub(r'\d\d\d\d-\d\d-\d\d', '', text) text = re.sub(r'![.*](.*)', '', text) text = re.sub(r'[.*](.*)', '', text) print 'keywords:' tags = jieba.analyse.extract_tags(text, topK=15) print ','.join(tags) print 'sum1:' print summarize1(text) print 'sum2:' print summarize2(text) print 'sum3:' print summarize3(text) print 'length:' + str(len(text)) print 'sum:' + str(len(summarize2(text)))