def main():
    # 输入压缩比
    ratio = raw_input("Please enter the compressed ratio: ")
    title, paragraghs = load_data("data/01.txt")
    sentences = []

    # 构建文本的句子顺序
    for paragragh in paragraghs:
        sentences.extend(sentence_cut(paragragh, punctuation_list='!!。'))
    sentences_with_indices = dict(zip(sentences, range(len(sentences))))

    # 抽取关键词,并计算句子的权重
    keywords = get_key_words(title, paragraghs)
    key_sentences = compute_sentences_weigths(keywords, paragraghs)

    # 根据压缩比,计算需要抽取多少个句子
    topK = int(len(key_sentences) * float(ratio))
    result_dict = {}
    for sentence in key_sentences[:topK]:
        result_dict[sentence] = sentences_with_indices[sentence]

    # 将抽取出来的句子按原文顺序排好输出
    result_dict = sorted(result_dict.iteritems(), key=lambda d: d[1])
    result_dict = [result[0] for result in result_dict]
    summary = ''.join(result_dict)
    print summary
def get_key_sentences(content_weights, content, p_weight=1.2, s_bias=1, s_weight = 1.2):
    sentences = sentence_cut(content, punctuation_list='!!。')
    for i in range(s_bias):
        content_weights[sentences[i]] = {'weight': 0, 'p_weight': p_weight, 's_weight':s_weight}
        content_weights[sentences[-i-1]] = {'weight': 0, 'p_weight': p_weight, 's_weight': s_weight}
    for sentence in sentences[s_bias:-s_bias]:
        content_weights[sentence] = {'weight': 0, 'p_weight': p_weight, 's_weight':1}
    return content_weights
def compute_sentences_weigths(keywords, paragraphs, p_bias=1, p_weight=1.2, s_bias=1, s_weight = 1.2):
    content_weights = {}
    for i in range(p_bias):
        content_weights = get_key_sentences(content_weights, paragraphs[i], p_weight=p_weight, s_bias=s_bias, s_weight =s_weight)
        content_weights = get_key_sentences(content_weights, paragraphs[-i-1], p_weight=p_weight, s_bias=s_bias, s_weight =s_weight)
    for paragraph in paragraphs[p_bias:-p_bias]:
        content_weights = get_key_sentences(content_weights, paragraph, p_weight=1, s_bias=s_bias, s_weight =s_weight)
    for sentence in content_weights.keys():
        for word in keywords.keys():
            if word in sentence:
                content_weights[sentence]['weight'] += keywords[word]
        inner_num = len(sentence_cut(sentence, punctuation_list=',;,::;… '))
        content_weights[sentence] = content_weights[sentence]['weight']*\
                                    content_weights[sentence]['p_weight']*content_weights[sentence]['s_weight']/inner_num
    content_weights = sorted(content_weights.iteritems(), key=lambda d: d[1], reverse=True)
    content_weights = [list(result)[0] for result in content_weights]
    return content_weights
Exemplo n.º 4
0
def main():
    with codecs.open("data/01.txt", 'r', encoding='GBK') as fr:
        sentences = []
        for line in fr.readlines():
            line = line.strip()
            if line:
                sentences.extend(sentence_cut(line, punctuation_list='。!!'))
    words, text = get_keywords(sentences, ['ns', 'nr', 'n'])

    weights = construct_matrix(words)
    num = len(text)
    start_tr = np.ones((1, num))
    d = 0.85
    iters = 100
    tr = textrank(start_tr, iters, d, weights).tolist()[0]

    ratio = float(raw_input("Please enter the compressed ratio: "))
    topK = int(num * ratio)
    summary = summaly(text, tr, topK)
    print summary
def main():
    with codecs.open("data/01.txt", 'r', encoding='GBK') as fr:
        sentences = []
        for line in fr.readlines():
            line = line.strip()
            if line:
                sentences.extend(sentence_cut(line, punctuation_list='。!!'))
    words, text = get_keywords(sentences,['ns', 'nr', 'n'])

    weights = construct_matrix(words)
    num = len(text)
    start_tr = np.ones((1,num))
    d = 0.85
    iters = 100
    tr = textrank(start_tr, iters, d, weights).tolist()[0]

    ratio = float(raw_input("Please enter the compressed ratio: "))
    topK = int(num*ratio)
    summary = summaly(text, tr, topK)
    print summary