Пример #1
0
    def test_empty_stream(self):
        g = GroupByStatement(max_num_files=10,
                             max_hashmap_entries=1000,
                             request_id="test_empty_stream")

        data = IncrementalKeyValueIterator(0, 1, 0)
        result = g.groupBy(data)

        self.assertEqual(g.spills, 0)
        self.assertEqual(result.hasNext(), False)
Пример #2
0
    def test_stream_spills_on_disk(self):
        g = GroupByStatement(max_num_files=4,
                             max_hashmap_entries=300,
                             request_id="test_stream_spills_on_disk")

        data = IncrementalKeyValueIterator(1000, 10, 7)
        data_copy = copy.deepcopy(data)

        result_iterator = g.groupBy(data)

        self.assertEqual(g.spills, 4)
        self.compare_outputs(data_copy, result_iterator)
Пример #3
0
    def test_low_memory(self):
        g = GroupByStatement(max_memory=1024,
                             request_id="test_low_memory")

        data = IncrementalKeyValueIterator(1000, 10, 7)
        data_copy = copy.deepcopy(data)

        result_iterator = g.groupBy(data)

        self.assertTrue(g.spills > 0)
        self.assertTrue(g.num_merge_stages > 0)
        self.assertTrue(g._num_files <= 1000)

        self.compare_outputs(data_copy, result_iterator)
Пример #4
0
    def test_large_stream(self):
        g = GroupByStatement(max_num_files=100,
                             max_hashmap_entries=10000,
                             request_id="test_large_stream")

        data = IncrementalKeyValueIterator(200000, 10, 7, 3, 2)
        data_copy = copy.deepcopy(data)

        result_iterator = g.groupBy(data)

        self.assertEqual(g.spills, 20)
        self.assertEqual(g._num_files, 20)

        self.compare_outputs(data_copy, result_iterator)
Пример #5
0
    def test_stream_spills_on_disk_and_file_merges_required(self):
        g = GroupByStatement(max_num_files=2,
                             max_hashmap_entries=100,
                             request_id="test_stream_spills_on_disk_and_file_merges_required")

        data = IncrementalKeyValueIterator(1000, 10, 7)
        data_copy = copy.deepcopy(data)

        result_iterator = g.groupBy(data)

        self.assertEqual(g.spills, 10)
        self.assertEqual(g.num_merge_stages, 3)
        self.assertEqual(g._num_files, 2)

        self.compare_outputs(data_copy, result_iterator)
Пример #6
0
    def test_consecutive_calls(self):
        g = GroupByStatement(max_num_files=2,
                             max_hashmap_entries=1)

        result_iterator_list = []
        request_id_list = []

        for request_id in range(10):
            data = IncrementalKeyValueIterator(10, 3, 3)
            result_iterator_list.append(g.groupBy(data))
            request_id_list.append(g._request_id)

        for index in range(10):
            # Exhaust iterator
            for key, value in result_iterator_list[index]:
                pass
            self.assertFalse(os.path.isdir(request_id_list[index]))