Exemplo n.º 1
0
class TestTokenizer(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.data_fetcher_mock = create_autospec(EagerFileFetcher)
        self.sut = EagerTokenizer(self.data_fetcher_mock)

    def test_should_tokenize_sentences_well(self):
        # Given:
        self.data_fetcher_mock.get_chunk_generator.return_value = (
            x for x in ["2 5", "1 3 Ala ma kota\n", "      \n", "1 3 Kot ma Ale\n"]
        )

        # When:
        token_generator = self.sut.get_token_generator()

        # Then:
        assert_that(next(token_generator), contains(True, "ala", "ma", "kota"))
        assert_that(next(token_generator), contains(True, "kot", "ma", "ale"))
        assert_that(calling(next).with_args(token_generator), raises(StopIteration))
Exemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.data_fetcher_mock = create_autospec(EagerFileFetcher)
        self.sut = EagerTokenizer(self.data_fetcher_mock)