def test_serializable(self): device_params = test_device_matmul(max_batch=5, max_hidden=9, max_classes=17, repeats=1, device='CPU:0', dtype='float32') json.dumps(device_params)
def test_normal(self): device_params = test_device_matmul(max_batch=4, max_hidden=8, max_classes=128, repeats=1, device='CPU:0', dtype='float32') approx_cost = interpolate_matmul_cost(device_params) batch4 = approx_cost(4, 8, 16) batch3 = approx_cost(3, 8, 16) self.assertAlmostEqual(batch3, batch4, places=1)
def tes_failure(self): device_params = test_device_matmul(max_batch=4, max_hidden=8, max_classes=128, repeats=1, device='GPU:0', dtype='float32') approx_cost = interpolate_matmul_cost(device_params) with self.assertRaises(ValueError): approx_cost(4, 9, 16) with self.assertRaises(ValueError): approx_cost(1, 9, 1)
def test_normal_cpu(self): device_params = test_device_matmul(max_batch=5, max_hidden=9, max_classes=17, repeats=1, device='CPU:0', dtype='float32') self.assertListEqual([1, 2, 3, 4, 5], device_params['batch_sizes']) self.assertListEqual([1, 2, 3, 4, 5, 7, 8, 9], device_params['hidden_sizes']) self.assertListEqual([1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17], device_params['class_sizes']) self.assertLen(device_params['cost_values'], 5 * 8 * 11)
def test_normalize_to_worst_adaptive(self): device_params = test_device_matmul(max_batch=8, max_hidden=25, max_classes=110, repeats=1, device='CPU:0', dtype='float32') freq_vocab = build_zipf_vocab(120) batch_sizes, head_sizes, speed_ups, best_splits = estimate_best_splits( device_params, freq_vocab, num_tails=2, hidden_size=24, factor=2) self.assertListEqual([1, 2, 3, 4, 5, 7, 8], batch_sizes) self.assertListEqual([14, 22, 30], head_sizes) self.assertLen(speed_ups, 7 * 3) self.assertLen(best_splits, 7 * 3)
def test_normal_gpu(self): if not tf.test.is_gpu_available(cuda_only=True): self.skipTest('No GPU available') device_params = test_device_matmul(max_batch=32, max_hidden=8, max_classes=16, repeats=1, device='GPU:0', dtype='float32') self.assertListEqual( [1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 23, 24, 25, 31, 32], device_params['batch_sizes']) self.assertListEqual([1, 2, 3, 4, 5, 7, 8], device_params['hidden_sizes']) self.assertListEqual([1, 2, 3, 4, 5, 7, 8, 9, 15, 16], device_params['class_sizes']) self.assertLen(device_params['cost_values'], 16 * 7 * 10)
def test_normal(self): device_params = test_device_matmul(max_batch=8, max_hidden=25, max_classes=30, repeats=1, device='CPU:0', dtype='float32') approx_cost = interpolate_matmul_cost(device_params) freq_vocab = build_zipf_vocab(30) all_freq = np.array([f for _, f in freq_vocab.most_common()]) all_prob = all_freq / np.sum(all_freq) prob_accum = np.cumsum(all_prob) split_size = [2, 9, 19] adaptive_split_cost(approx_cost, prob_accum, split_size, batch_size=6, hidden_size=24, factor=2)