Exemplo n.º 1
0
    def testMatMulBroadcast(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                in0 = array_ops.placeholder(np.float16, shape=[1024])
                in0_bcast = gen_array_ops.broadcast_to(in0, shape=[1024, 1024])
                in1 = array_ops.placeholder(np.float16, shape=[1024, 1024])

                with variable_scope.variable_scope("vs", use_resource=True):
                    weights = variable_scope.get_variable(
                        "x",
                        dtype=np.float16,
                        shape=[1024, 1024],
                        initializer=init_ops.constant_initializer(0.0))

                mm1 = math_ops.matmul(in0_bcast, weights, name="mm1")
                mm2 = math_ops.matmul(in1, mm1, name="mm2")

            report = ReportJSON(self, sess)
            tu.move_variable_initialization_to_cpu()

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run(mm2, {in0: np.zeros(in0.shape), in1: np.zeros(in1.shape)})

            report.parse_log()

            report.assert_total_tile_memory(112509300)
            report.assert_max_tile_memory(100438)

            ok = ['__seed*', 'host-exchange-local-copy-', 'mm1/dot*', 'Copy_']
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 2
0
    def testGroupNormalizeInference(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = convolutional.conv2d(
                        x,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    gamma = constant_op.constant([0.5, 0.5], np.float32)
                    beta = constant_op.constant([0.5, 0.5], np.float32)
                    mean = constant_op.constant([0.5, 0.5], np.float32)
                    inv_std_dev = constant_op.constant([0.5, 0.5], np.float32)
                    y = gen_popnn_ops.popnn_group_norm_inference(
                        inputs=y,
                        gamma=gamma,
                        beta=beta,
                        mean=mean,
                        inv_std_dev=inv_std_dev,
                        data_format="NHWC",
                        epsilon=0.0015,
                        num_groups=2)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    y = gen_popnn_ops.popnn_group_norm_inference(
                        inputs=y,
                        gamma=gamma,
                        beta=beta,
                        mean=mean,
                        inv_std_dev=inv_std_dev,
                        data_format="NHWC",
                        epsilon=0.0015,
                        num_groups=2)

            report = ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve',
                'vs/PopnnGroupNormInference/group-norm-inference*/'
            ]
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 3
0
    def testMultiIpu(self):
        with self.session() as sess:

            def my_graph(pa, pb, pc):
                with ops.device("/device:IPU:0"):
                    with ipu.scopes.ipu_shard(0):
                        o1 = pa + pb

                    with ipu.scopes.ipu_shard(1):
                        o2 = pa + pc
                        out = o1 + o2

                return [out]

            with ops.device('cpu'):
                pa = array_ops.placeholder(np.float32, [2], name="a")
                pb = array_ops.placeholder(np.float32, [2], name="b")
                pc = array_ops.placeholder(np.float32, [2], name="c")

            report = ReportJSON(self, sess, device_count_override=2)
            out = ipu.ipu_compiler.compile(my_graph, [pa, pb, pc])

            report.reset()

            fd = {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]}
            result = sess.run(out, fd)
            self.assertAllClose(result[0], [3., 8.])

            report.parse_log()
            tm = report.get_tensor_map()
            mods = tm.computation_names()
            self.assertEqual(len(mods), 1)

            tiles = tm.tile_ids(mods[0])

            self.assertEqual(len(tiles), 2)
            self.assertEqual(tiles, set((0, 1216)))

            ok = [
                '__seed*',
                'add*/add*/Add',
                'switchControlBroadcast2/*OnTileCopy',
                'Copy_XLA_Args*/arg0.1_to_/ipu-inter-copy*/OnTileCopy',
                'Copy_/ipu-inter-copy_to_/ipu-inter-copy*/OnTileCopy',
            ]
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 4
0
    def testBatchNormalizeInferenceDontMatchDifferentTypes(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = convolutional.conv2d(
                        x,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    y = layers_norm.batch_normalization(y, fused=True)
                    y = math_ops.cast(y, np.float16)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    y = layers_norm.batch_normalization(y, fused=True)

            report = ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # Matches two convolutions
            ok = [
                '__seed*', 'Copy_', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization/FusedBatchNorm*/batch-norm-inference.*/',
                'vs/Cast/convert.*/Cast',
                'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization_1/FusedBatchNorm*/batch-norm-inference.*/'
            ]
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 5
0
    def testNormCacheConstants(self):
        with self.session() as sess:

            def model(x, y, z):
                scale = gen_array_ops.broadcast_to(z, shape=[65536])
                offset = scale
                b_mean, b_var = nn.moments(x, [0, 1, 2], name='moments')
                a = nn.fused_batch_norm(x,
                                        scale,
                                        offset,
                                        b_mean,
                                        b_var,
                                        1e-3,
                                        is_training=False,
                                        name="a")
                b = nn.fused_batch_norm(y,
                                        scale,
                                        offset,
                                        b_mean,
                                        b_var,
                                        1e-3,
                                        is_training=False,
                                        name="b")

                return a[0] + b[0]

            with ops.device('cpu'):
                x = array_ops.placeholder(np.float16, [1, 1, 1, 65536],
                                          name="x")
                y = array_ops.placeholder(np.float16, [1, 1, 1, 65536],
                                          name="y")
                z = array_ops.placeholder(np.float16, shape=[1])

            with ops.device("/device:IPU:0"):
                res = ipu_compiler.compile(model, inputs=[x, y, z])

            report = ReportJSON(self, sess)
            tu.move_variable_initialization_to_cpu()

            sess.run(variables.global_variables_initializer())

            report.reset()

            r = sess.run(res, {
                x: np.ones(x.shape),
                y: np.ones(y.shape),
                z: [1.0]
            })
            self.assertAllClose(r[0], np.full(r[0].shape, 2))

            report.parse_log()

            report.assert_total_tile_memory(1634674)
            report.assert_max_tile_memory(1551)

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*',
                'host-exchange-local-copy',
                'Copy_',
                'moments/SquaredDifference/multiply',
                'a/batch-norm-inference',
                'add/add*/Add',
            ]
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 6
0
    def testGroupNormsMatchFwdBwd(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = convolutional.conv2d(
                        x,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv1')
                    gamma = constant_op.constant([0.5, 0.5], np.float32)
                    beta = constant_op.constant([0.5, 0.5], np.float32)
                    y, _, _ = gen_popnn_ops.popnn_group_norm_training(
                        inputs=y,
                        gamma=gamma,
                        beta=beta,
                        data_format="NHWC",
                        epsilon=0.0015,
                        num_groups=2)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv2')
                    y, _, _ = gen_popnn_ops.popnn_group_norm_training(
                        inputs=y,
                        gamma=gamma,
                        beta=beta,
                        data_format="NHWC",
                        epsilon=0.0015,
                        num_groups=2)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv3')
                    y, _, _ = gen_popnn_ops.popnn_group_norm_training(
                        inputs=y,
                        gamma=gamma,
                        beta=beta,
                        data_format="NHWC",
                        epsilon=0.0015,
                        num_groups=2)

                loss = math_ops.reduce_sum(y)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)

            report = ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # One GN for forwards and one GN for grad
            # pylint: disable=line-too-long
            ok = [
                '__seed*',
                'Copy_',
                'vs/conv1/Conv2D/convolution*/Conv_1x1/Convolve',
                'vs/PopnnGroupNormTraining/group-norm-training*/Norm',
                'vs/PopnnGroupNormTraining/group-norm-training*/iStdDev',
                'vs/PopnnGroupNormTraining/group-norm-training*/Whiten',
                'Sum/reduce.*/*/Reduce',
                'gradients/vs/PopnnGroupNormTraining_2_grad/PopnnGroupNormGrad/group-norm-grad*/',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose',
            ]
            # pylint: enable=line-too-long
            report.assert_all_compute_sets_and_list(ok)
Exemplo n.º 7
0
    def testBatchNormsMatchFwdBwd(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = convolutional.conv2d(
                        x,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv1')
                    y = layers_norm.batch_normalization(y,
                                                        fused=True,
                                                        training=True)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv2')
                    y = layers_norm.batch_normalization(y,
                                                        fused=True,
                                                        training=True)
                    y = convolutional.conv2d(
                        y,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv3')
                    y = layers_norm.batch_normalization(y,
                                                        fused=True,
                                                        training=True)

                loss = math_ops.reduce_sum(y)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)

            report = ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # One BN for forwards and one BN for grad
            # (note that we don't cache gradient application)
            # pylint: disable=line-too-long
            ok = [
                '__seed*',
                'Copy*',
                'vs/conv1/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization/FusedBatchNorm*/batch-norm-training.*/',
                'Sum/reduce.*/ReduceOnTile/InToIntermediateNoExchange/Reduce',
                'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce',
                'gradients/vs/batch_normalization_2/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/',
                'GradientDescent/update_vs/batch_normalization/',
                'GradientDescent/update_vs/batch_normalization_1/',
                'GradientDescent/update_vs/batch_normalization_2/',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose',
                'gradients/vs/conv*/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose',
            ]
            # pylint: enable=line-too-long
            report.assert_all_compute_sets_and_list(ok)