Exemplo n.º 1
0
def summarize3(txt, cuttor=None):
    # TODO how to do this better 21/08/13 13:07:22
    # You can replace this with "import numpy", and of cause you have to
    # install the lib numpy
    name = "numpy"
    numpy = __import__(name, fromlist=[])

    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(
            re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U))

    sentences = []
    #TODO do it better 21/08/13 12:36:08
    for s, need in tmp_cuttor.cut_to_sentence(txt):
        if need:
            sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words,
                                         tmp_cuttor)
    avg = numpy.mean([s[1] for s in scored_sentences])
    std = numpy.std([s[1] for s in scored_sentences])
    mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences
                   if score > avg + 0.5 * std]
    mean_scored_summary = [sentences[idx] for (idx, score) in mean_scored]
    return ', '.join(mean_scored_summary) + '.'
Exemplo n.º 2
0
def summarize1(original_text, summary_size=8, cuttor=None):
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(
            re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U))

    words_sorted = extract_keywords(original_text, 16, cuttor)
    summary_set = {}
    sentences = []
    for s, need in tmp_cuttor.cut_to_sentence(original_text):
        if need:
            sentences.append(s)

    for word in words_sorted:
        matching_sentence = __search_word(sentences, word)
        if matching_sentence <> '':
            summary_set[matching_sentence] = 1
            if len(summary_set) >= summary_size:
                break
    summary = []
    for s in sentences:
        if s in summary_set:
            summary.append(s)
    return ', '.join(summary) + '.'
Exemplo n.º 3
0
def summarize1(original_text, summary_size = 8, cuttor = None):
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))

    words_sorted = extract_keywords(original_text, 16, cuttor)
    summary_set = {}
    sentences = []
    for s,need in tmp_cuttor.cut_to_sentence(original_text):
        if need:
            sentences.append(s)

    for word in words_sorted:
        matching_sentence = __search_word(sentences, word)
        if matching_sentence <> '':
            summary_set[matching_sentence] = 1
            if len(summary_set) >= summary_size:
                break
    summary = []
    for s in sentences:
        if s in summary_set:
            summary.append(s)
    return ', '.join(summary)+'.'
Exemplo n.º 4
0
def summarize3(txt, cuttor=None):
    # TODO how to do this better 21/08/13 13:07:22
    # You can replace this with "import numpy", and of cause you have to
    # install the lib numpy
    name = "numpy"
    numpy = __import__(name, fromlist=[])

    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))
    
    sentences = []
    #TODO do it better 21/08/13 12:36:08
    for s,need in tmp_cuttor.cut_to_sentence(txt):
        if need:
            sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor)
    avg = numpy.mean([s[1] for s in scored_sentences])
    std = numpy.std([s[1] for s in scored_sentences])
    mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences
                   if score > avg + 0.5 * std]
    mean_scored_summary=[sentences[idx] for (idx, score) in mean_scored]
    return ', '.join(mean_scored_summary) + '.'
Exemplo n.º 5
0
def summarize2(txt, cuttor=None):
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))
    
    sentences = []
    #TODO do it better 21/08/13 12:36:08
    for s,need in tmp_cuttor.cut_to_sentence(txt):
        if need:
            sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_2, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor)

    top_n_scored = sorted(scored_sentences, key=lambda s: s[1])[-TOP_SENTENCES:]
    top_n_scored = sorted(top_n_scored, key=lambda s: s[0])
    top_n_summary=[sentences[idx] for (idx, score) in top_n_scored]
    return ', '.join(top_n_summary) + '.'
Exemplo n.º 6
0
def summarize3(txt, cuttor=None):
    # Remove numpy and calc mean,std by own 21/08/13 13:07:22
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))
    
    sentences = []
    #TODO do it better 21/08/13 12:36:08
    for s,need in tmp_cuttor.cut_to_sentence(txt):
        if need:
            sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_3, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor)
    avg,std = _mean_std([s[1] for s in scored_sentences])
    mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences
                   if score > avg + 0.5 * std]
    mean_scored_summary=[sentences[idx] for (idx, score) in mean_scored]
    return ', '.join(mean_scored_summary) + '.'