Пример #1
0
  def testMirroredStratParaAsync(self):
    """Tests RNG/MirrorStrategy interaction #3.

    The user can create n independent RNGs outside strategy.scope(), where n
    is the number of replicas, and give one to each replica. The replicas can
    thus get different random-number streams.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    gens = random.get_global_generator().split(count=2)
    devices = ["/cpu:0", test_util.gpu_device_name()]
    strat = MirroredStrategy(devices=devices)
    # Use `PerReplica` to specify which `gen` is sent to which replica
    gens = dist_values.PerReplica(
        device_map=dist_values.ReplicaDeviceMap(devices),
        values=[[g] for g in gens])
    with strat.scope():
      def f(gen):
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(
          fn=f, args=gens)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllDifferent(values)
    def testMirroredVarAsFunctionArg(self):
        """Tests that RNG with MirroredVariable can be used as tf.function's arg.
    """
        shape = [3, 4]
        dtype = dtypes.int32
        strat = MirroredStrategy(
            devices=["/cpu:0", test_util.gpu_device_name()])
        with strat.scope():
            gen = random.Generator.from_seed(1234)

            @def_function.function
            def f(gen):
                t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
                t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
                t = array_ops.stack([t1, t2])
                return t

            def g():
                return f(gen)

            for _ in range(2):
                results = strat.extended.call_for_each_replica(fn=g)
                values = results.values
                self.assertAllEqual(2, len(values))
                self.assertAllEqual(values[0], values[1])
Пример #3
0
  def testMirroredStratUnseedSync(self):
    """Tests RNG/MirrorStrategy interaction #2c.

    If the RNG created in situation #2 is unseeded, the replicas' random-number
    streams are still the same.

    If the RNG created in situation #2b is unseeded, the replicas' random-number
    streams will be different. We can't test this for now because the op
    'NonDeterministicInts' is not implemented on GPU yet.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    # TODO(wangpeng): support calling `random.Generator()` inside `f` (i.e.
    #   inside `call_for_each_replica` so that each replica can get a
    #   different random-number stream. The only obstacle is that op
    #   'NonDeterministicInts' is not implemented on GPU.)
    with strat.scope():
      gen = random.Generator()
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllEqual(values[0], values[1])
Пример #4
0
    def testTrain(self):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        batch_size = 20
        sequence_length = 35
        with tf.Graph().as_default(), tf.device(tf.test.gpu_device_name()):
            inputs_ph = tf.placeholder(tf.int64, [sequence_length, batch_size],
                                       "inputs")
            labels_ph = tf.placeholder(tf.int64, [sequence_length, batch_size],
                                       "labels")

            inputs = np.ones(inputs_ph.shape.as_list(), dtype=np.int64)
            labels = np.ones(labels_ph.shape.as_list(), dtype=np.int64)

            model = rnn_ptb.test_model(tf.test.is_gpu_available())
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            loss = rnn_ptb.loss_fn(model, inputs_ph, labels_ph, training=True)
            grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss),
                                           0.25)
            train_op = optimizer.apply_gradients(grads)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                sess.run(train_op,
                         feed_dict={
                             inputs_ph: inputs,
                             labels_ph: labels
                         })
                sess.run([train_op, loss],
                         feed_dict={
                             inputs_ph: inputs,
                             labels_ph: labels
                         })
    def testGPUSameAsOldRandomOps(self):
        """Tests that the generated numbers are the same as the old random_ops.py.

    The GPU version.
    """
        seed1, seed2 = 79, 25
        with ops.device(test_util.gpu_device_name()):
            random.reset_global_generator([0, seed2, seed1])
        shape = constant_op.constant([4, 7])
        dtype = dtypes.float64

        @def_function.function
        def old():
            with ops.device(test_util.gpu_device_name()):
                return gen_random_ops.random_standard_normal(shape,
                                                             dtype=dtype,
                                                             seed=seed1,
                                                             seed2=seed2)

        def new():
            with ops.device(test_util.gpu_device_name()):
                return random.get_global_generator().standard_normal(
                    shape, dtype=dtype)

        for _ in range(100):
            self.assertAllEqual(old(), new())
Пример #6
0
    def testMirroredStratParaSyncWithinFun(self):
        """Tests RNG/MirrorStrategy interaction #2b.

    If the RNG creation is within `f` in situation #2, the replicas'
    random-number streams are still the same. Note that whether the RNG creation
    is within strategy.scope() or not doesn't affect the result in this case
    (putting in inside strategy.scope() will cause unnecessary mirror creation
    and waste memory though).
    """
        shape = [3, 4]
        dtype = dtypes.int32
        strat = MirroredStrategy(
            devices=["/cpu:0", test_util.gpu_device_name()])

        def f():
            gen = random.Generator.from_seed(1234)
            t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
            t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
            t = array_ops.stack([t1, t2])
            return t

        results = strat.extended.call_for_each_replica(fn=f)
        values = results.values
        self.assertAllEqual(2, len(values))
        self.assertAllEqual(values[0], values[1])
Пример #7
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(
            rnn_cell_impl.GRUCell(3), test_util.gpu_device_name())
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if (("gpu"  in step_stats.dev_stats[0].device) or
                 ("sycl" in step_stats.dev_stats[0].device)) else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
Пример #8
0
    def testRemoteFunctionGPUCPU(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        gpu_target = "/job:localhost/replica:0/task:0" + test_util.gpu_device_name(
        )

        @function.Defun(dtypes.float32, dtypes.float32)
        def _remote_fn(a, b):
            return math_ops.multiply(a, b)

        with ops.device(gpu_target):
            a = variables.Variable(2, dtype=dtypes.float32)
            b = variables.Variable(3, dtype=dtypes.float32)

        with ops.device(gpu_target):
            remote_op = functional_ops.remote_call(
                args=[a, b],
                Tout=[dtypes.float32],
                f=_remote_fn,
                target="/job:localhost/replica:0/task:0/cpu:0")[0] + 3.0

        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            mul = sess.run(remote_op)
            self.assertEqual(mul, 9.0)
Пример #9
0
    def _benchmark_apply(self, label, model):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        num_iters = 100
        num_warmup = 10
        dataset = tf.data.Dataset.from_tensors(
            tf.ones([PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE],
                    dtype=tf.int64)).repeat(num_iters + num_warmup)
        inputs = dataset.make_one_shot_iterator().get_next()

        with tf.device(tf.test.gpu_device_name()):
            outputs = model(inputs, training=True)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for _ in range(num_warmup):
                    sess.run(outputs)
                gc.collect()

                start = time.time()
                for _ in range(num_iters):
                    sess.run(outputs)
                self._report(label, start, num_iters,
                             tf.test.gpu_device_name(),
                             PTBBenchmark.BATCH_SIZE)
Пример #10
0
    def _benchmark_train(self, label, model):
        if "sycl" in test_util.gpu_device_name().lower():
            return

        num_iters = 100
        num_warmup = 10
        dataset = tf.data.Dataset.from_tensors(
            tf.ones([PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE],
                    dtype=tf.int64)).repeat(num_iters + num_warmup)
        # inputs and labels have the same shape
        dataset = tf.data.Dataset.zip((dataset, dataset))
        (inputs, labels) = dataset.make_one_shot_iterator().get_next()

        with tf.device(tf.test.gpu_device_name()):
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            loss = rnn_ptb.loss_fn(model, inputs, labels, training=True)
            grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss),
                                           0.25)
            train_op = optimizer.apply_gradients(grads)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for _ in range(num_warmup):
                    sess.run(train_op)
                gc.collect()
                start = time.time()
                for _ in range(num_iters):
                    sess.run(train_op)
                self._report(label, start, num_iters,
                             tf.test.gpu_device_name(),
                             PTBBenchmark.BATCH_SIZE)
Пример #11
0
    def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
        if not test.is_gpu_available():
            # Can't perform this test w/o a GPU
            return

        with self.test_session(use_gpu=True) as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 1, 3])
                cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3),
                                                   test_util.gpu_device_name())
                with ops.device("/cpu:0"):
                    outputs, _ = rnn.dynamic_rnn(cell=cell,
                                                 inputs=x,
                                                 dtype=dtypes.float32)
                run_metadata = config_pb2.RunMetadata()
                opts = config_pb2.RunOptions(
                    trace_level=config_pb2.RunOptions.FULL_TRACE)

                sess.run([variables_lib.global_variables_initializer()])
                _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

            step_stats = run_metadata.step_stats
            ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
                       ("sycl" in step_stats.dev_stats[0].device)) else 1
            gpu_stats = step_stats.dev_stats[ix].node_stats
            cpu_stats = step_stats.dev_stats[1 - ix].node_stats
            self.assertFalse(
                [s for s in cpu_stats if "gru_cell" in s.node_name])
            self.assertTrue(
                [s for s in gpu_stats if "gru_cell" in s.node_name])
Пример #12
0
    def testDifferentDeviceCPUGPU(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        gpu_name = test_util.gpu_device_name()
        self._prefetch_fn_helper_one_shot(
            "cpu_gpu", "/job:localhost/replica:0/task:0/cpu:0",
            "/job:localhost/replica:0/task:0" + gpu_name)
Пример #13
0
 def testColocateGradients(self):
     with ops.Graph().as_default() as g:
         w = constant(1.0, shape=[1, 1])
         x = constant(1.0, shape=[1, 2])
         with g.device(test_util.gpu_device_name()):
             wx = math_ops.matmul(w, x)
         gw = gradients.gradients(wx, [w],
                                  colocate_gradients_with_ops=True)[0]
     self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
Пример #14
0
def _available_devices():
  devices = ["cpu"]
  if not test_util.gpu_device_name():
    devices.append("gpu")

  if has_tpu():
    devices.append("tpu")

  return tuple(devices)
Пример #15
0
def _available_devices():
  devices = ["cpu"]
  if not test_util.gpu_device_name():
    devices.append("gpu")

  if has_tpu():
    devices.append("tpu")

  return tuple(devices)
Пример #16
0
 def testCrossDeviceSplit(self):
   """Tests that a CPU RNG can split into RNGs on GPU.
   """
   with ops.device("/device:CPU:0"):
     gen = random.Generator(seed=1234)  # gen is on CPU
     self.assertRegex("CPU", gen.state.device)
   with ops.device(test_util.gpu_device_name()):
     gens = gen.split(count=10)  # gens are on GPU
     self.assertRegex("GPU", gens[0].state.device)
Пример #17
0
 def _testGpu(self, x):
   device = test_util.gpu_device_name()
   if device:
     np_ans = np.array(x)
     with context.device(device):
       tf_ans = ops.convert_to_tensor(x).numpy()
     if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
 def testGPUEqualsCPU(self, dtype):
   """Tests that GPU and CPU generate the same integer outputs."""
   seed = 1234
   shape = [315, 49]
   with ops.device("/device:CPU:0"):
     cpu = random.Generator.from_seed(seed).uniform_full_int(
         shape=shape, dtype=dtype)
   with ops.device(test_util.gpu_device_name()):
     gpu = random.Generator.from_seed(seed).uniform_full_int(
         shape=shape, dtype=dtype)
   self.assertAllEqual(cpu, gpu)
Пример #19
0
 def testGPUEqualsCPU(self, dtype):
     """Tests that GPU and CPU generate the same integer outputs."""
     seed = 1234
     shape = [315, 49]
     with ops.device("/device:CPU:0"):
         cpu = random.Generator.from_seed(seed).uniform_full_int(
             shape=shape, dtype=dtype)
     with ops.device(test_util.gpu_device_name()):
         gpu = random.Generator.from_seed(seed).uniform_full_int(
             shape=shape, dtype=dtype)
     self.assertAllEqual(cpu, gpu)
Пример #20
0
    def testSupportDevices(self):
        gpu_type = test_util.gpu_device_type()
        gpu_name = test_util.gpu_device_name()
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type=gpu_type, frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name=gpu_name)
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['range'], ['/CPU:0', gpu_name])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            if test.is_gpu_available():
                self.assertEqual(supported_dev['add'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                self.assertEqual(supported_dev['Sum'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                # The axis tensor must reside on the host
                self.assertEqual(
                    supported_dev['range'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                self.assertEqual(
                    supported_dev['add'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
Пример #21
0
    def testPrefetchToDeviceGpu(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")

        host_dataset = dataset_ops.Dataset.range(10)
        gpu_name = test_util.gpu_device_name()
        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device(gpu_name))

        iterator = device_dataset.make_one_shot_iterator()
        next_element = iterator.get_next()

        with self.test_session() as sess:
            for i in range(10):
                self.assertEqual(i, sess.run(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
Пример #22
0
 def testCPUGPUCopy(self):
   if not context.num_gpus():
     return
   t = constant_op.constant([1.0, 2.0])
   l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape())
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     l_gpu = array_ops.identity(l)
     self.assertAllEqual(
         self.evaluate(
             list_ops.tensor_list_pop_back(
                 l_gpu, element_dtype=dtypes.float32)[1]), 2.0)
   l_cpu = array_ops.identity(l_gpu)
   self.assertAllEqual(
       self.evaluate(
           list_ops.tensor_list_pop_back(
               l_cpu, element_dtype=dtypes.float32)[1]), 2.0)
Пример #23
0
    def testColocateGradientsWithGateGradients(self):
        if not test_util.is_gpu_available():
            self.skipTest("No GPU available")
        with ops.Graph().as_default() as g:
            with g.device("/device:CPU:0"):
                x = constant(1.0, shape=[1, 1])
                y = constant(1.0, shape=[1, 1])
                s = x + y
            with g.device(test_util.gpu_device_name()):
                z = math_ops.reduce_sum(s)

            gz_x = gradients.gradients(z, [x],
                                       colocate_gradients_with_ops=True,
                                       gate_gradients=True)[0]
            with session.Session():
                # Make sure the placer doesn't complain.
                gz_x.eval()
Пример #24
0
  def testMirroredStratParaSync(self):
    """Tests RNG/MirrorStrategy interaction #2.

    If an RNG is created inside strategy.scope(), each replica gets an
    mirror of this RNG. If they access their RNGs in the same
    manner, their random-number streams are the same.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    with strat.scope():
      gen = random.Generator(seed=1234)
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllEqual(values[0], values[1])
Пример #25
0
  def testMirroredStratSeq(self):
    """Tests RNG/MirrorStrategy interaction #1.

    If an RNG is created outside strategy.scope(), all replicas will access the
    same RNG object, and accesses are serialized.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    gen = random.Generator(seed=1234)
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    with strat.scope():
      def f():
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(
          fn=f)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllDifferent(values)
  def testMirroredStratParaSyncWithinFun(self):
    """Tests RNG/MirrorStrategy interaction #2b.

    If the RNG creation is within `f` in situation #2, the replicas'
    random-number streams are still the same. Note that whether the RNG creation
    is within strategy.scope() or not doesn't affect the result in this case
    (putting in inside strategy.scope() will cause unnecessary mirror creation
    and waste memory though).
    """
    shape = [3, 4]
    dtype = dtypes.int32
    strat = MirroredStrategy(devices=["/cpu:0", test_util.gpu_device_name()])
    def f():
      gen = random.Generator(seed=1234)
      t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
      t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
      t = array_ops.stack([t1, t2])
      return t
    results = strat.extended.call_for_each_replica(fn=f)
    values = results.values
    self.assertAllEqual(2, len(values))
    self.assertAllEqual(values[0], values[1])
Пример #27
0
 def testGetSetGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testGetSetItem()
Пример #28
0
  def testSameAsOldRandomOpsGPU(self):
    """Tests that the generated numbers are the same as the old random_ops.py.

    The GPU version.
    """
    self._sameAsOldRandomOps(test_util.gpu_device_name(), GPU_FLOATS)
Пример #29
0
 def testStackGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testStack()
Пример #30
0
 def testFromTensorGPU(self):
   if not context.num_gpus():
     return
   gpu_name = test_util.gpu_device_name()
   with context.device(gpu_name):
     self.testTensorListFromTensor()
 def old():
     with ops.device(test_util.gpu_device_name()):
         return gen_random_ops.random_standard_normal(shape,
                                                      dtype=dtype,
                                                      seed=seed1,
                                                      seed2=seed2)
 def new():
     with ops.device(test_util.gpu_device_name()):
         return random.get_global_generator().standard_normal(
             shape, dtype=dtype)