def test_cue_3(): document = build_document( ( "ba "*10, "bb "*10, " sa"*8 + " bb"*10, "bb bc ba", ), (), ( "babbbc "*10, "na nb nc nd sa" + " bc"*10, " ba n"*10, ) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("ba", "bb", "bc",) summarizer.stigma_words = ("sa", "sb", "sc",) sentences = summarizer.cue_method(document, 5) assert list(map(to_unicode, sentences)) == [ ("ba "*10).strip(), ("bb "*10).strip(), "bb bc ba", "na nb nc nd sa bc bc bc bc bc bc bc bc bc bc", ("ba n "*10).strip(), ]
def test_key_empty(): summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("ba", "bb", "bc",) sentences = summarizer.key_method(build_document(), 10) assert list(map(to_unicode, sentences)) == []
def build_edmundson(parser, language): summarizer = EdmundsonSummarizer(Stemmer(language)) summarizer.null_words = get_stop_words(language) summarizer.bonus_words = parser.significant_words summarizer.stigma_words = parser.stigma_words return summarizer
def test_bonus_words_property(): summarizer = EdmundsonSummarizer() assert summarizer.bonus_words == frozenset() words = ("word", "another", "and", "some", "next",) summarizer.bonus_words = words assert summarizer.bonus_words == frozenset(words)
def test_cue_1(): document = build_document( ("ba bb bc bb unknown ľščťžýáíé sb sc sb",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("ba", "bb", "bc",) summarizer.stigma_words = ("sa", "sb", "sc",) sentences = summarizer.cue_method(document, 10) assert len(sentences) == 1
def test_key_1(): document = build_document( ("wa wb wc wd", "I like music",), ("This is test sentence with some extra words and bonus",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("ba", "bb", "bc", "bonus",) sentences = summarizer.key_method(document, 1) assert list(map(to_unicode, sentences)) == [ "This is test sentence with some extra words and bonus", ]
def test_key_2(): document = build_document( ("Om nom nom nom nom", "Sure I summarize it, with bonus",), ("This is bonus test sentence with some extra words and bonus",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("nom", "bonus",) sentences = summarizer.key_method(document, 2) assert list(map(to_unicode, sentences)) == [ "Om nom nom nom nom", "This is bonus test sentence with some extra words and bonus", ]
def test_cue_letters_case(): document = build_document( ("X X X", "x x x x",), ("w w w", "W W W W",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("X", "w",) summarizer.stigma_words = ("stigma",) sentences = summarizer.cue_method(document, 2) assert list(map(to_unicode, sentences)) == [ "x x x x", "W W W W", ]
def test_mixed_cue_key(): document = build_document_from_string(""" # This is cool heading Because I am sentence I like words And because I am string I like characters # blank and heading This is next paragraph because of blank line above Here is the winner because contains words like cool and heading """) summarizer = EdmundsonSummarizer(cue_weight=1, key_weight=1, title_weight=0, location_weight=0) summarizer.bonus_words = ("cool", "heading", "sentence", "words", "like", "because") summarizer.stigma_words = ("this", "is", "I", "am", "and",) sentences = summarizer(document, 2) assert list(map(to_unicode, sentences)) == [ "Because I am sentence I like words", "Here is the winner because contains words like cool and heading", ]
def test_key_3(): document = build_document( ("wa", "wa wa", "wa wa wa", "wa wa wa wa", "wa Wa Wa Wa wa",), ("x X x X",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("wa", "X",) sentences = summarizer.key_method(document, 3) assert list(map(to_unicode, sentences)) == [ "wa wa wa", "wa wa wa wa", "wa Wa Wa Wa wa", ] sentences = summarizer.key_method(document, 3, weight=0) assert list(map(to_unicode, sentences)) == [ "wa wa wa wa", "wa Wa Wa Wa wa", "x X x X", ]
def test_cue_2(): document = build_document( ("ba bb bc bb unknown ľščťžýáíé sb sc sb",), ("Pepek likes spinach",) ) summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("ba", "bb", "bc",) summarizer.stigma_words = ("sa", "sb", "sc",) sentences = summarizer.cue_method(document, 10) assert list(map(to_unicode, sentences)) == [ "ba bb bc bb unknown ľščťžýáíé sb sc sb", "Pepek likes spinach", ] sentences = summarizer.cue_method(document, 1) assert list(map(to_unicode, sentences)) == [ "ba bb bc bb unknown ľščťžýáíé sb sc sb", ]
def test_cue_with_no_stigma_words(): summarizer = EdmundsonSummarizer() summarizer.bonus_words = ("great", "very", "beautiful",) with pytest.raises(ValueError): summarizer.cue_method(build_document(), 10)