def test_tfidf_transform(self): p = TestPipeline() uri_to_line = p | 'create sample' >> beam.Create( [('1.txt', 'abc def ghi'), ('2.txt', 'abc def'), ('3.txt', 'abc')]) result = (uri_to_line | tfidf.TfIdf() | beam.Map(lambda (word, (uri, tfidf)): (word, uri, tfidf)))
def test_tfidf_transform(self): with TestPipeline() as p: uri_to_line = p | 'create sample' >> beam.Create( [('1.txt', 'abc def ghi'), ('2.txt', 'abc def'), ('3.txt', 'abc')]) result = ( uri_to_line | tfidf.TfIdf() | beam.Map(lambda (word, (uri, tfidf)): (word, uri, tfidf))) assert_that(result, equal_to(EXPECTED_RESULTS))