def testCreateMulticloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                          num_ps_tasks=2)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(framework.get_variables()), 5)
            for i, v in enumerate(framework.get_variables()):
                t = i % 2
                self.assertDeviceEqual(v.device,
                                       '/job:ps/task:%d/device:CPU:0' % t)
                self.assertDeviceEqual(v.device, v.value().device)
            self.assertEqual(len(clones), 2)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device,
                                       '/job:worker/device:GPU:%d' % i)
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            model_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                model_args)
            self.assertEqual(len(framework.get_variables()), 5)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(len(update_ops), 2)

            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            self.assertEqual(len(grads_and_vars),
                             len(tf.trainable_variables()))
            self.assertEqual(total_loss.op.name, 'total_loss')
            for g, v in grads_and_vars:
                self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(clones), 1)
            clone = clones[0]
            self.assertEqual(clone.outputs.op.name,
                             'BatchNormClassifier/fully_connected/Sigmoid')
            self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
            self.assertEqual(clone.scope, '')
            self.assertEqual(len(framework.get_variables()), 5)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
                self.assertDeviceEqual(v.device, v.value().device)
    def testCreateMulticlone(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            num_clones = 4
            deploy_config = model_deploy.DeploymentConfig(
                num_clones=num_clones)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(framework.get_variables()), 5)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(len(clones), num_clones)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               clone.scope)
                self.assertEqual(len(update_ops), 2)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
    def testCreateLogisticClassifier(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = LogisticClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            clone = clones[0]
            self.assertEqual(len(framework.get_variables()), 2)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(clone.outputs.op.name,
                             'LogisticClassifier/fully_connected/Sigmoid')
            self.assertEqual(clone.scope, '')
            self.assertDeviceEqual(clone.device, 'GPU:0')
            self.assertEqual(len(tf.losses.get_losses()), 1)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(update_ops, [])
    def testCPUonly(self):
        deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)

        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
    def testDefaults(self):
        deploy_config = model_deploy.DeploymentConfig()

        self.assertEqual(framework.get_variables(), [])
        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
    def testMultiGPU(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=2)

        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
        self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
        self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
        self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
    def testReplicasPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
                                                      num_ps_tasks=2)

        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
    def testLocalTrainOp(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            model_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                          clone_on_cpu=True)

            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

            self.assertEqual(framework.get_variables(), [])
            model = model_deploy.deploy(deploy_config,
                                        model_fn,
                                        model_args,
                                        optimizer=optimizer)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(len(update_ops), 4)
            self.assertEqual(len(model.clones), 2)
            self.assertEqual(model.total_loss.op.name, 'total_loss')
            self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
            self.assertEqual(model.train_op.op.name, 'train_op')

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                moving_mean = framework.get_variables_by_name('moving_mean')[0]
                moving_variance = framework.get_variables_by_name(
                    'moving_variance')[0]
                initial_loss = sess.run(model.total_loss)
                initial_mean, initial_variance = sess.run(
                    [moving_mean, moving_variance])
                self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
                self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
                for _ in range(10):
                    sess.run(model.train_op)
                final_loss = sess.run(model.total_loss)
                self.assertLess(final_loss, initial_loss / 5.0)

                final_mean, final_variance = sess.run(
                    [moving_mean, moving_variance])
                expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
                expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
                expected_var = self._addBesselsCorrection(16, expected_var)
                self.assertAllClose(final_mean, expected_mean)
                self.assertAllClose(final_variance, expected_var)
    def testMultiGPUPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                      num_ps_tasks=1)

        self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertDeviceEqual(deploy_config.clone_device(1),
                               '/job:worker/device:GPU:1')
        self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
        self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
    def testNoSummariesOnGPUForEvals(self):
        with tf.Graph().as_default():
            deploy_config = model_deploy.DeploymentConfig(num_clones=2)

            # clone function creates a fully_connected layer with a regularizer loss.
            def ModelFn():
                inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
                reg = layers.l2_regularizer(0.001)
                layers.fully_connected(inputs, 30, weights_regularizer=reg)

            # No optimizer here, it's an eval.
            model = model_deploy.deploy(deploy_config, ModelFn)
            # The model summary op should have a few summary inputs and all of them
            # should be on the CPU.
            self.assertTrue(model.summary_op.op.inputs)
            for inp in model.summary_op.op.inputs:
                self.assertEqual('/device:CPU:0', inp.device)
    def testVariablesPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)

        with tf.device(deploy_config.variables_device()):
            a = tf.Variable(0)
            b = tf.Variable(0)
            c = tf.no_op()
            d = framework.variable(
                'a', [], caching_device=deploy_config.caching_device())

        self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(a.device, a.value().device)
        self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
        self.assertDeviceEqual(b.device, b.value().device)
        self.assertDeviceEqual(c.device, '')
        self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(d.value().device, '')
    def testPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                      num_ps_tasks=1)

        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
        with tf.device(deploy_config.variables_device()):
            a = tf.Variable(0)
            b = tf.Variable(0)
            c = tf.no_op()
            d = framework.variable(
                'a', [], caching_device=deploy_config.caching_device())
        self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(a.device, a.value().device)
        self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(b.device, b.value().device)
        self.assertDeviceEqual(c.device, '')
        self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(d.value().device, '')