示例#1
0
def test_single_sentence():
    document = build_document(("I am one sentence", ))
    summarizer = TextRankSummarizer()
    summarizer.stop_words = (
        "I",
        "am",
    )

    returned = summarizer(document, 10)
    assert len(returned) == 1
示例#2
0
def test_two_sentences():
    document = build_document(
        ("I am that 1. sentence", "And I am 2. winning prize"))
    summarizer = TextRankSummarizer()
    summarizer.stop_words = (
        "I",
        "am",
        "and",
        "that",
    )

    returned = summarizer(document, 10)
    assert len(returned) == 2
    assert to_unicode(returned[0]) == "I am that 1. sentence"
    assert to_unicode(returned[1]) == "And I am 2. winning prize"
示例#3
0
def test_rating_with_zero_or_single_words_in_sentences(sentences,
                                                       expected_ratings):
    """
    This is an edge-case test when the sentence(s) have only one word or even zero words.
    This test makes me sure the logic will not break when such a case is encountered.
    """
    document = build_document(sentences)
    summarizer = TextRankSummarizer()

    ratings = summarizer.rate_sentences(document)

    assert ratings == {
        document.sentences[0]: pytest.approx(expected_ratings[0]),
        document.sentences[1]: pytest.approx(expected_ratings[1]),
    }
示例#4
0
def test_sentences_rating():
    document = build_document([
        "a c e g",
        "a b c d e f g",
        "b d f",
    ])
    summarizer = TextRankSummarizer()

    ratings = summarizer.rate_sentences(document)

    assert ratings == {
        document.sentences[0]: pytest.approx(0.29714368215098025),
        document.sentences[1]: pytest.approx(0.42683373199392705),
        document.sentences[2]: pytest.approx(0.2760223553913001),
    }
    assert pytest.approx(sum(ratings.values())) == 1
示例#5
0
def test_numpy_not_installed():
    summarizer = TextRankSummarizer()

    numpy = text_rank_module.numpy
    text_rank_module.numpy = None

    with pytest.raises(ValueError):
        summarizer(build_document(), 10)

    text_rank_module.numpy = numpy
示例#6
0
def test_stop_words_correctly_removed():
    summarizer = TextRankSummarizer()
    summarizer.stop_words = ["stop", "Halt", "SHUT", "HmMm"]

    document = build_document(
        (
            "stop halt shut hmmm",
            "Stop Halt Shut Hmmm",
        ),
        (
            "StOp HaLt ShUt HmMm",
            "STOP HALT SHUT HMMM",
        ),
        (
            "Some relevant sentence",
            "Some moRe releVant sentEnce",
        ),
    )
    sentences = document.sentences

    expected = []
    returned = summarizer._to_words_set(sentences[0])
    assert expected == returned
    returned = summarizer._to_words_set(sentences[1])
    assert expected == returned
    returned = summarizer._to_words_set(sentences[2])
    assert expected == returned
    returned = summarizer._to_words_set(sentences[3])
    assert expected == returned

    expected = ["some", "relevant", "sentence"]
    returned = summarizer._to_words_set(sentences[4])
    assert expected == returned
    expected = ["some", "more", "relevant", "sentence"]
    returned = summarizer._to_words_set(sentences[5])
    assert expected == returned
示例#7
0
def test_empty_document():
    document = build_document()
    summarizer = TextRankSummarizer(Stemmer("english"))

    returned = summarizer(document, 10)
    assert len(returned) == 0
示例#8
0
def build_text_rank(parser, language):
    summarizer = TextRankSummarizer(Stemmer(language))
    summarizer.stop_words = get_stop_words(language)

    return summarizer