Пример #1
0
def key_word_test():
    filename = 'key_test.txt'
    with codecs.open(filename, 'r', 'utf-8') as file:
        content = file.read()
        keys = extract_keywords(content)
        #print ','.join(keys)
        print summarize1(content)
        print summarize2(content)
        print summarize3(content)
Пример #2
0
def infosum():
    print 'loading data'
    h = codecs.open(testnote, 'r', 'utf-8')

    text = h.read()
    text = re.sub(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d','', text)
    text = re.sub(r'\d\d\d\d-\d\d-\d\d','', text)
    text = re.sub(r'![.*](.*)','',text)
    text = re.sub(r'[.*](.*)','',text)

    print 'keywords:'
    tags = jieba.analyse.extract_tags(text, topK=15)
    print ','.join(tags)
    print 'sum1:'
    print summarize1(text)
    print 'sum2:'
    print summarize2(text)
    print 'sum3:'
    print summarize3(text)
    print 'length:' + str(len(text))
    print 'sum:' + str(len(summarize2(text)))
Пример #3
0
def infosum():
    print 'loading data'
    h = codecs.open(testnote, 'r', 'utf-8')

    text = h.read()
    text = re.sub(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d', '', text)
    text = re.sub(r'\d\d\d\d-\d\d-\d\d', '', text)
    text = re.sub(r'![.*](.*)', '', text)
    text = re.sub(r'[.*](.*)', '', text)

    print 'keywords:'
    tags = jieba.analyse.extract_tags(text, topK=15)
    print ','.join(tags)
    print 'sum1:'
    print summarize1(text)
    print 'sum2:'
    print summarize2(text)
    print 'sum3:'
    print summarize3(text)
    print 'length:' + str(len(text))
    print 'sum:' + str(len(summarize2(text)))