Пример #1
0
 def test_biggest_batch_first(self):
     params = Params({
         'dynamic_padding': True,
         'biggest_batch_first': True,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     biggest_batches = [next(batches) for _ in range(2)]
     assert_array_equal(biggest_batches[0][0], numpy.asarray([[3]]))
     assert_array_equal(biggest_batches[1][0], numpy.asarray([[1], [0],
                                                              [4]]))
Пример #2
0
 def test_biggest_batch_first(self):
     params = Params({
         'padding_noise': 0.0,
         'dynamic_padding': True,
         'biggest_batch_first': True,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     biggest_batches = [next(batches) for _ in range(2)]
     assert self.as_list(biggest_batches[0][0]) == [3]
     assert self.as_list(biggest_batches[1][0]) == [1, 0, 4]
Пример #3
0
 def test_instances_are_sorted_by_sorting_keys(self):
     params = Params({
         'dynamic_padding': True,
         'padding_noise': 0.0,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     one_epoch_arrays = [next(batches) for _ in range(4)]
     one_epoch_arrays.sort(key=lambda x: x[0][0])
     assert self.as_list(one_epoch_arrays[0][0]) == [1, 0, 4]
     assert self.as_list(one_epoch_arrays[1][0]) == [3]
     assert self.as_list(one_epoch_arrays[2][0]) == [6, 7, 2]
     assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5]
Пример #4
0
 def test_instances_are_sorted_by_sorting_keys(self):
     params = Params({'dynamic_padding': True})
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     one_epoch_arrays = [next(batches) for _ in range(4)]
     one_epoch_arrays.sort(key=lambda x: x[0][0])
     assert_array_equal(one_epoch_arrays[0][0],
                        numpy.asarray([[1], [0], [4]]))
     assert_array_equal(one_epoch_arrays[1][0], numpy.asarray([[3]]))
     assert_array_equal(one_epoch_arrays[2][0],
                        numpy.asarray([[6], [7], [2]]))
     assert_array_equal(one_epoch_arrays[3][0],
                        numpy.asarray([[8], [9], [5]]))
Пример #5
0
 def test_adaptive_grouping(self):
     params = Params({
         'padding_noise': 0.0,
         'dynamic_padding': True,
         'adaptive_batch_sizes': True,
         'adaptive_memory_usage_constant': 130,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     one_epoch_arrays = [next(batches) for _ in range(4)]
     one_epoch_arrays.sort(key=lambda x: x[0][0])
     assert self.as_list(one_epoch_arrays[0][0]) == [0, 4]
     assert self.as_list(one_epoch_arrays[1][0]) == [3]
     assert self.as_list(one_epoch_arrays[2][0]) == [7, 2, 1]
     assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5, 6]
Пример #6
0
 def test_batches_are_consistent_with_no_repermuting(self):
     params = Params({
         'padding_noise': 0.0,
         'sort_every_epoch': False,
         'dynamic_padding': True,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     first_epoch_arrays = [next(batches) for _ in range(4)]
     second_epoch_arrays = [next(batches) for _ in range(4)]
     first_epoch_arrays.sort(key=lambda x: x[0][0])
     second_epoch_arrays.sort(key=lambda x: x[0][0])
     first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
     second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
     assert first_epoch == second_epoch
Пример #7
0
 def test_adaptive_grouping(self):
     params = Params({
         'dynamic_padding': True,
         'adaptive_batch_sizes': True,
         'adaptive_memory_usage_constant': 130,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     one_epoch_arrays = [next(batches) for _ in range(4)]
     one_epoch_arrays.sort(key=lambda x: x[0][0])
     assert_array_equal(one_epoch_arrays[0][0], numpy.asarray([[0], [4]]))
     assert_array_equal(one_epoch_arrays[1][0], numpy.asarray([[3]]))
     assert_array_equal(one_epoch_arrays[2][0],
                        numpy.asarray([[7], [2], [1]]))
     assert_array_equal(one_epoch_arrays[3][0],
                        numpy.asarray([[8], [9], [5], [6]]))
Пример #8
0
 def test_sort_every_batch_actually_adds_noise_every_batch(self):
     # We're just going to get two epoch's worth of batches, and make sure that they're
     # different.
     params = Params({
         'padding_noise': 0.8,
         'sort_every_epoch': True,
         'dynamic_padding': True,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 4
     first_epoch_arrays = [next(batches) for _ in range(4)]
     second_epoch_arrays = [next(batches) for _ in range(4)]
     first_epoch_arrays.sort(key=lambda x: x[0][0])
     second_epoch_arrays.sort(key=lambda x: x[0][0])
     first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
     second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
     assert first_epoch != second_epoch
Пример #9
0
 def test_maximum_batch_size_is_actually_a_maximum(self):
     params = Params({
         'padding_noise': 0.0,
         'dynamic_padding': True,
         'adaptive_batch_sizes': True,
         'adaptive_memory_usage_constant': 50,
         'maximum_batch_size': 2,
     })
     generator = DataGenerator(self.text_trainer, params)
     batches = generator.create_generator(IndexedDataset(self.instances))
     assert generator.last_num_batches == 7
     one_epoch_arrays = [next(batches) for _ in range(7)]
     one_epoch_arrays.sort(key=lambda x: x[0][0])
     print([self.as_list(x[0]) for x in one_epoch_arrays])
     assert self.as_list(one_epoch_arrays[0][0]) == [0]
     assert self.as_list(one_epoch_arrays[1][0]) == [2, 1]
     assert self.as_list(one_epoch_arrays[2][0]) == [3]
     assert self.as_list(one_epoch_arrays[3][0]) == [4]
     assert self.as_list(one_epoch_arrays[4][0]) == [5, 6]
     assert self.as_list(one_epoch_arrays[5][0]) == [7]
     assert self.as_list(one_epoch_arrays[6][0]) == [8, 9]