Пример #1
0
    def test_get_all_sentences(self, samp: Sample) -> None:
        """Test `Summarizer.summarize`

        Arguments:
            samp {Sample} -- sample data
        """
        summ = Summarizer()
        sentences = samp.sentences  # type: typing.List[SampleSentence]

        expecteds = sorted(sentences, key=lambda sent: sent.index)
        receiveds = summ.get_all_sentences(samp.body, samp.title)

        assert (len(receiveds) == len(expecteds)), assert_ex(
            'summary result count',
            len(receiveds),
            len(expecteds),
            hint=samp.name)

        for index, received in enumerate(receiveds):
            expected = expecteds[index]  # type: SampleSentence

            assert expected.equals(received), assert_ex(
                'summary',
                received,
                expected,
                hint='{!r}: {!r}'.format(index, snip(receiveds[index].text)))
Пример #2
0
def test_summarize(sample_name: str, limit: int) -> None:
    """Test `summarize` in text subpackage

    Arguments:
        sample_name {str} -- name of data source
        limit {int} -- number of sentences to return
    """
    samp = Sample(TEXT_PATH, sample_name)
    title = samp.title
    text = samp.body

    expecteds = _get_expected_sentences(samp, limit)
    receiveds = _get_received_sentences(title, text, limit)

    assert (len(receiveds) == len(expecteds)), assert_ex(
        'summary sentence count', len(receiveds), limit)

    for i, received in enumerate(receiveds):
        expected = expecteds[i]

        assert (received == expected), assert_ex(
            'summary [text at index]',
            received,
            expected,
            hint=[snip(received), i])
Пример #3
0
    def test_get_sentence(self, sample: Sample,
                          sentence: SampleSentence) -> None:
        """Test `Summarizer.get_sentence`

        Arguments:
            sample {Sample} -- sample data
            sentence {SampleSentence} -- individual sentence from sample
        """
        summ = Summarizer()
        title_words = sample.title_words
        top_keywords = self._get_top_keywords(sample.keywords)
        top_keyword_list = pluck_keyword_words(top_keywords)
        text = sentence.text
        index = sentence.index
        total = len(sample.sentences)

        expected = sentence.total_score
        received = summ.get_sentence(text, index, total, title_words,
                                     top_keywords,
                                     top_keyword_list).total_score

        assert kinda.eq(received, expected), assert_ex('sentence score',
                                                       received,
                                                       expected,
                                                       hint='{}: {!r}'.format(
                                                           sample.name,
                                                           snip(text)))
Пример #4
0
def test_parse_config(
        path_dict: typing.Dict[str, typing.Any],
        expected: typing.Any) -> None:
    """Parse a nominal configuration

    Arguments:
        path_dict {typing.Dict[str, typing.Any]} --
            arguments for Parser initialization
        expected {typing.Any} -- idiom config or exception
    """
    path_kwargs = {'root': BUILTIN, 'idiom': DEFAULT_IDIOM}
    path_kwargs.update(path_dict)
    cfg_path = get_config_path(**path_kwargs)

    try:
        ideal, language, stop_words = parse_config(cfg_path)
        received = (ideal, language, len(stop_words))

    except Exception as err:  # pylint: disable=broad-except
        received = check_exception(err, expected)

    assert (received == expected), assert_ex(
        'parse config',
        received,
        expected)
Пример #5
0
def test_score_frequency(sample: Sample, sentence: SampleSentence) -> None:
    """Test `Summarizer` sentence scoring by keyword frequency

    Arguments:
        sample {Sample} -- sample data
        sentence {SampleSentence} -- individual sentence from sample
    """
    summ = Summarizer()
    words = summ.parser.get_all_stems(sentence.text)
    top_keywords = summ.get_top_keywords(sample.body)
    top_keyword_list = pluck_keyword_words(top_keywords)

    params = (
        (
            'density score',
            sentence.dbs_score,
            score_by_dbs(words, top_keywords, top_keyword_list),
        ),
        (
            'summation score',
            sentence.sbs_score,
            score_by_sbs(words, top_keywords, top_keyword_list),
        ),
    )

    for desc, expected, received in params:
        result = kinda.eq(received, expected, COMPOSITE_TOLERANCE)

        assert result, assert_ex(desc, received, expected)
Пример #6
0
def test_get_stop_words(
        spec: typing.Dict[str, typing.Any],
        expected: int) -> None:
    """Test `get_stop_words` for ParserConfig

    Arguments:
        spec {typing.Dict[str, typing.Any]} -- nominal configuration
        expected {int} -- number of expected words (-ish)
    """
    idiom_spec = {'stop_words': spec}
    language = 'english'
    user = spec.get('user', [])

    received = list(get_stop_words(idiom_spec, language))
    missing = len([x for x in user if x not in received]) if user else 0
    test = (missing == 0)

    if test:
        if expected < 0:
            test = (len(received) > abs(expected))

        else:
            test = (len(received) == expected)

    assert test, assert_ex(
        'get stop words',
        received,
        expected)
Пример #7
0
def test__float_len(item_list: typing.Sized, expected: float):
    """Test _float_len in summarizer subpackage

    Arguments:
        item_list {typing.Sized} -- a sized iterable
        expected {float} -- expected return
    """
    received = _float_len(item_list)

    assert (received == expected), assert_ex('float length', received,
                                             expected)
Пример #8
0
def test_get_top_keyword_threshold(keywords: SampleKeywordList,
                                   expected: float) -> None:
    """Test `Summarizer.get_top_keyword_threshold`

    Arguments:
        keywords {SampleKeywordList} -- sample keywords
        expected {int} -- minimum count
    """
    received = get_top_keyword_threshold(keywords)

    assert (received == expected), assert_ex('top keyword frequency >=',
                                             received, expected)
Пример #9
0
    def test_get_all_words(self, samp: Sample) -> None:
        """Test `Parser.get_all_words`

        Arguments:
            samp {Sample} -- sample data
        """
        parser = Parser()

        expected = samp.compare_words
        for received in parser.get_all_words(samp.body):
            assert (received in expected), assert_ex(
                'all words', received, None)
Пример #10
0
    def test_get_key_stems(self, samp: Sample) -> None:
        """Test `Parser.get_key_stems`

        Arguments:
            samp {Sample} -- sample data
        """
        parser = Parser(idiom=samp.idiom)

        expected = sorted(self._get_expected_keywords(samp.keywords))
        received = sorted(parser.get_key_stems(samp.body))

        assert (received == expected), assert_ex(
            'keyword list', expected, received)
Пример #11
0
def test_remove_punctuations(samp: Sample) -> None:
    """Test `remove_punctuations` for Parser

    Arguments:
        samp {Sample} -- sample data
    """
    expected = samp.remove_punctuations
    received = remove_punctuations(samp.body)

    assert (received == expected), assert_ex(
        'punctuation removal',
        repr(received),
        repr(expected))
Пример #12
0
    def test_get_top_keywords(self, samp: Sample) -> None:
        """Test `Summarizer.get_top_keywords`

        Arguments:
            samp {Sample} -- sample data
        """
        summ = Summarizer()

        expecteds = sorted(self._get_top_keywords(samp.keywords))
        exp_len = len(expecteds)

        receiveds = sorted(summ.get_top_keywords(samp.body))
        rcv_len = len(receiveds)

        assert (rcv_len == exp_len), assert_ex('top keywords count', rcv_len,
                                               exp_len)

        for i, expected in enumerate(expecteds):
            received = receiveds[i]

            assert (received == expected), assert_ex('top keyword', received,
                                                     expected)
Пример #13
0
    def test_get_keywords(self, samp: Sample) -> None:
        """Test `Parser.get_keywords`

        Arguments:
            samp {Sample} -- sample data
        """
        exp_words, exp_scores = self._get_sample_keyword_data(samp)
        rcv_words, rcv_scores = self._get_keyword_result(samp.body)

        for word in set(exp_words + rcv_words):
            assert (word in exp_words) and (word in rcv_words), assert_ex(
                'word list mismatch',
                rcv_words,
                exp_words)

            expected = exp_scores[word]
            received = rcv_scores[word]
            assert kinda.eq(received, expected), assert_ex(
                'bad keyword score',
                received,
                expected,
                hint=word)
Пример #14
0
def test_get_config_path(root: str, idiom: str, expected: Path) -> None:
    """Test `get_config_path` for ParserConfig

    Arguments:
        root {str} -- root directory of idiom config
        idiom {str} -- basename of idiom config
        expected {Path} -- self explanatory
    """
    received = get_config_path(root, idiom)

    assert (received == expected), assert_ex(
        'config path',
        received,
        expected)
Пример #15
0
def test_score_body_sentences(samp: Sample) -> None:
    """Test `score_body_sentences` for text subpackage

    Arguments:
        samp {Sample} -- sample data
    """
    for i, sent in enumerate(score_body_sentences(samp.body, samp.title)):
        expected = samp.sentences[i].total_score
        received = sent.total_score

        assert kinda.eq(received, expected), assert_ex(
            'sentence score',
            received,
            expected,
            hint=snip(sent.text))
Пример #16
0
    def test_score_by_length(self, samp: Sample) -> None:
        """Test `Summarizer.score_by_length`

        Arguments:
            samp {Sample} -- sample data
        """
        summ = Summarizer(idiom=samp.idiom)
        words = summ.parser.get_all_words(samp.sentences[0].text)

        expected = samp.length_score
        received = summ.score_by_length(words)

        assert kinda.eq(received, expected), assert_ex('sentence score',
                                                       received,
                                                       expected,
                                                       hint=' '.join(words))
Пример #17
0
    def test_split_words(self, samp: Sample) -> None:
        """Test `Parser.split_words`

        Arguments:
            samp {Sample} -- sample data
        """
        parser = Parser()
        text = samp.body

        expected = samp.split_words
        received = parser.split_words(text)

        assert (received == expected), assert_ex(
            'word split',
            expected,
            received,
            hint=samp.name)
Пример #18
0
def test_score_by_title(samp: Sample) -> None:
    """Test `Parser.score_by_title`

    Arguments:
        samp {Sample} -- sample data
    """
    summ = Summarizer(idiom=samp.idiom)
    title_words = summ.parser.get_key_words(samp.title)
    sentence_words = summ.parser.get_all_words(samp.sentences[0].text)

    expected = samp.title_score
    received = score_by_title(title_words, sentence_words)

    assert kinda.eq(received, expected), assert_ex(
        'title score',
        received,
        expected,
        hint='\n'.join(['', repr(title_words),
                        repr(sentence_words)]))
Пример #19
0
def test_load_idiom(
        kwargs: typing.Dict[str, typing.Any],
        expected: typing.Tuple[int, str, int]) -> None:
    """Test `load_idiom` for ParserConfig

    Arguments:
        kwargs {typing.Dict[str, typing.Any]} -- kwargs passed to Parser
        expected {typing.Tuple[int, str, int]} -- expected data
    """
    test = False

    try:
        received = load_idiom(**kwargs)
        test = compare_loaded_idiom(received, expected)

    # pylint: disable=broad-except
    except (PermissionError, FileNotFoundError, ValueError) as err:
        test = check_exception(err, expected) is not None
    # pylint: enable=broad-except

    assert test, assert_ex('config', received, expected)
Пример #20
0
    def test_split_sentences(self, samp: Sample) -> None:
        """Test `Parser.split_sentences`

        Arguments:
            samp {Sample} -- sample data
        """
        parser = Parser(idiom=samp.idiom)
        expected = None

        try:
            expected = samp.split_sentences

        except AttributeError:
            expected = [sent.text for sent in samp.sentences]

        received = parser.split_sentences(samp.body)

        assert (received == expected), assert_ex(
            'sentence split',
            received,
            expected)
Пример #21
0
def test_get_slice_length(
        nominal: typing.Any,
        total: int,
        expected: typing.Any) -> None:
    """Test `get_slice_length` in text subpackage

    Arguments:
        nominal {float} -- exact number (int) or percentage (0 < nominal < 1)
        total {int} -- number of items to slice from
        expected {typing.Any} -- expected Exception/number of items
    """
    try:
        received = get_slice_length(nominal, total)

    except ValueError as err:
        received = check_exception(err, expected)

    assert (expected == received), assert_ex(
        'slice length',
        received,
        expected,
        hint='nominal: {!r}'.format(nominal))
Пример #22
0
    def test___init__(
            self,
            root: str,
            idiom: str,
            expected: typing.Tuple[int, str, int]) -> None:
        """Test `ParserConfig` initialization

        Arguments:
        root {str} -- root directory of idiom config
        idiom {str} -- basename of idiom config
            expected {typing.Tuple[int, str, int]} --
                [ideal words, NLTK language, stop word count]
        """
        config = ParserConfig(root, idiom)
        received = (
            config.ideal_sentence_length,
            config.language,
            len(config.stop_words))

        assert (received == expected), assert_ex(
            'parser config',
            received,
            expected)