def testMatMulBroadcast(self): with self.session() as sess: with ops.device("/device:IPU:0"): in0 = array_ops.placeholder(np.float16, shape=[1024]) in0_bcast = gen_array_ops.broadcast_to(in0, shape=[1024, 1024]) in1 = array_ops.placeholder(np.float16, shape=[1024, 1024]) with variable_scope.variable_scope("vs", use_resource=True): weights = variable_scope.get_variable( "x", dtype=np.float16, shape=[1024, 1024], initializer=init_ops.constant_initializer(0.0)) mm1 = math_ops.matmul(in0_bcast, weights, name="mm1") mm2 = math_ops.matmul(in1, mm1, name="mm2") report = ReportJSON(self, sess) tu.move_variable_initialization_to_cpu() sess.run(variables.global_variables_initializer()) report.reset() sess.run(mm2, {in0: np.zeros(in0.shape), in1: np.zeros(in1.shape)}) report.parse_log() report.assert_total_tile_memory(112509300) report.assert_max_tile_memory(100438) ok = ['__seed*', 'host-exchange-local-copy-', 'mm1/dot*', 'Copy_'] report.assert_all_compute_sets_and_list(ok)
def testGroupNormalizeInference(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer()) gamma = constant_op.constant([0.5, 0.5], np.float32) beta = constant_op.constant([0.5, 0.5], np.float32) mean = constant_op.constant([0.5, 0.5], np.float32) inv_std_dev = constant_op.constant([0.5, 0.5], np.float32) y = gen_popnn_ops.popnn_group_norm_inference( inputs=y, gamma=gamma, beta=beta, mean=mean, inv_std_dev=inv_std_dev, data_format="NHWC", epsilon=0.0015, num_groups=2) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer()) y = gen_popnn_ops.popnn_group_norm_inference( inputs=y, gamma=gamma, beta=beta, mean=mean, inv_std_dev=inv_std_dev, data_format="NHWC", epsilon=0.0015, num_groups=2) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run(y, {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Would fail if there were two batch norms in the graph ok = [ '__seed*', 'Copy_', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve', 'vs/PopnnGroupNormInference/group-norm-inference*/' ] report.assert_all_compute_sets_and_list(ok)
def testMultiIpu(self): with self.session() as sess: def my_graph(pa, pb, pc): with ops.device("/device:IPU:0"): with ipu.scopes.ipu_shard(0): o1 = pa + pb with ipu.scopes.ipu_shard(1): o2 = pa + pc out = o1 + o2 return [out] with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = ReportJSON(self, sess, device_count_override=2) out = ipu.ipu_compiler.compile(my_graph, [pa, pb, pc]) report.reset() fd = {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]} result = sess.run(out, fd) self.assertAllClose(result[0], [3., 8.]) report.parse_log() tm = report.get_tensor_map() mods = tm.computation_names() self.assertEqual(len(mods), 1) tiles = tm.tile_ids(mods[0]) self.assertEqual(len(tiles), 2) self.assertEqual(tiles, set((0, 1216))) ok = [ '__seed*', 'add*/add*/Add', 'switchControlBroadcast2/*OnTileCopy', 'Copy_XLA_Args*/arg0.1_to_/ipu-inter-copy*/OnTileCopy', 'Copy_/ipu-inter-copy_to_/ipu-inter-copy*/OnTileCopy', ] report.assert_all_compute_sets_and_list(ok)
def testBatchNormalizeInferenceDontMatchDifferentTypes(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer()) y = layers_norm.batch_normalization(y, fused=True) y = math_ops.cast(y, np.float16) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer()) y = layers_norm.batch_normalization(y, fused=True) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run(y, {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Matches two convolutions ok = [ '__seed*', 'Copy_', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization/FusedBatchNorm*/batch-norm-inference.*/', 'vs/Cast/convert.*/Cast', 'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization_1/FusedBatchNorm*/batch-norm-inference.*/' ] report.assert_all_compute_sets_and_list(ok)
def testNormCacheConstants(self): with self.session() as sess: def model(x, y, z): scale = gen_array_ops.broadcast_to(z, shape=[65536]) offset = scale b_mean, b_var = nn.moments(x, [0, 1, 2], name='moments') a = nn.fused_batch_norm(x, scale, offset, b_mean, b_var, 1e-3, is_training=False, name="a") b = nn.fused_batch_norm(y, scale, offset, b_mean, b_var, 1e-3, is_training=False, name="b") return a[0] + b[0] with ops.device('cpu'): x = array_ops.placeholder(np.float16, [1, 1, 1, 65536], name="x") y = array_ops.placeholder(np.float16, [1, 1, 1, 65536], name="y") z = array_ops.placeholder(np.float16, shape=[1]) with ops.device("/device:IPU:0"): res = ipu_compiler.compile(model, inputs=[x, y, z]) report = ReportJSON(self, sess) tu.move_variable_initialization_to_cpu() sess.run(variables.global_variables_initializer()) report.reset() r = sess.run(res, { x: np.ones(x.shape), y: np.ones(y.shape), z: [1.0] }) self.assertAllClose(r[0], np.full(r[0].shape, 2)) report.parse_log() report.assert_total_tile_memory(1634674) report.assert_max_tile_memory(1551) # Would fail if there were two batch norms in the graph ok = [ '__seed*', 'host-exchange-local-copy', 'Copy_', 'moments/SquaredDifference/multiply', 'a/batch-norm-inference', 'add/add*/Add', ] report.assert_all_compute_sets_and_list(ok)
def testGroupNormsMatchFwdBwd(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv1') gamma = constant_op.constant([0.5, 0.5], np.float32) beta = constant_op.constant([0.5, 0.5], np.float32) y, _, _ = gen_popnn_ops.popnn_group_norm_training( inputs=y, gamma=gamma, beta=beta, data_format="NHWC", epsilon=0.0015, num_groups=2) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv2') y, _, _ = gen_popnn_ops.popnn_group_norm_training( inputs=y, gamma=gamma, beta=beta, data_format="NHWC", epsilon=0.0015, num_groups=2) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv3') y, _, _ = gen_popnn_ops.popnn_group_norm_training( inputs=y, gamma=gamma, beta=beta, data_format="NHWC", epsilon=0.0015, num_groups=2) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # One GN for forwards and one GN for grad # pylint: disable=line-too-long ok = [ '__seed*', 'Copy_', 'vs/conv1/Conv2D/convolution*/Conv_1x1/Convolve', 'vs/PopnnGroupNormTraining/group-norm-training*/Norm', 'vs/PopnnGroupNormTraining/group-norm-training*/iStdDev', 'vs/PopnnGroupNormTraining/group-norm-training*/Whiten', 'Sum/reduce.*/*/Reduce', 'gradients/vs/PopnnGroupNormTraining_2_grad/PopnnGroupNormGrad/group-norm-grad*/', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose', ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def testBatchNormsMatchFwdBwd(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv1') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv2') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv3') y = layers_norm.batch_normalization(y, fused=True, training=True) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # One BN for forwards and one BN for grad # (note that we don't cache gradient application) # pylint: disable=line-too-long ok = [ '__seed*', 'Copy*', 'vs/conv1/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization/FusedBatchNorm*/batch-norm-training.*/', 'Sum/reduce.*/ReduceOnTile/InToIntermediateNoExchange/Reduce', 'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce', 'gradients/vs/batch_normalization_2/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/', 'GradientDescent/update_vs/batch_normalization/', 'GradientDescent/update_vs/batch_normalization_1/', 'GradientDescent/update_vs/batch_normalization_2/', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose', 'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose', ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)