Пример #1
0
 def _verify_data(self, pcol, init_size, data_size):
     read = pcol | 'read' >> ReadAllFromParquet()
     v1 = (
         read
         | 'get_number' >> Map(lambda x: x['number'])
         | 'sum_globally' >> CombineGlobally(sum)
         |
         'validate_number' >> FlatMap(lambda x: TestParquetIT._sum_verifier(
             init_size, data_size, x)))
     v2 = (
         read
         | 'make_pair' >> Map(lambda x: (x['name'], x['number']))
         | 'count_per_key' >> Count.PerKey()
         |
         'validate_name' >> FlatMap(lambda x: TestParquetIT._count_verifier(
             init_size, data_size, x)))
     _ = ((v1, v2, pcol)
          | 'flatten' >> Flatten()
          | 'reshuffle' >> Reshuffle()
          | 'cleanup' >> Map(lambda x: FileSystems.delete([x])))
Пример #2
0
 def test_aggregator_empty_input(self):
     actual = [] | CombineGlobally(max).without_defaults()
     self.assertEqual(actual, [])