def disabled_testSmallIntegerOpsForcedToCPU(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.int64) b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.int64) with context.device('gpu:0'): c = a + b # Op forced to CPU since all constants are integers and small. self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:CPU:0') a = array_ops.zeros((8, 10), dtype=dtypes.int64) b = array_ops.ones((8, 10), dtype=dtypes.int64) with context.device('gpu:0'): c = a + b # Op not forced to CPU since the tensors are larger than 64 elements. self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0') a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.float32) b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.float32) with context.device('gpu:0'): c = a + b # Op not forced to CPU since the constants are not integers. self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0')
def testResourceTensorPlacement(self): with context.device('gpu:0'): v = resource_variable_ops.ResourceVariable(1.0) with context.device('cpu:0'): # Check that even though we specified the cpu device we'll run the read op # in the device where the handle is. self.assertAllEqual( gen_resource_variable_ops.read_variable_op(v.handle, v.dtype), 1.0)
def testGenericSummary(self): with context.device(self._test_device): x = constant_op.constant(1337.0) with context.device("cpu:0"): metadata = constant_op.constant("foo") self._writer.generic("x", x, metadata) event = self._readLastEvent() self.assertEqual("x", event.summary.value[0].tag)
def testReEntrant(self): cpu = context.device('cpu:0') gpu = context.device('gpu:0') with cpu: with gpu: with gpu: self.assertEndsWith(current_device(), 'GPU:0') self.assertEndsWith(current_device(), 'GPU:0') self.assertEndsWith(current_device(), 'CPU:0') with gpu: self.assertEndsWith(current_device(), 'GPU:0')
def run_eager_mode(self, **kwargs): if force_gpu: gpu_name = gpu_device_name() if not gpu_name: gpu_name = "/device:GPU:0" with context.device(gpu_name): f(self) elif use_gpu: # TODO(xpan): Support softplacement and gpu by default when available. f(self, **kwargs) else: with context.device("/device:CPU:0"): f(self, **kwargs)
def testDevicePlacementEnforcesConsistency(self): cpu = context.device('cpu:0') gpu = context.device('gpu:0') cpu.__enter__() self.assertEndsWith(current_device(), 'CPU:0') gpu.__enter__() self.assertEndsWith(current_device(), 'GPU:0') with self.assertRaisesRegexp( RuntimeError, 'Exiting device scope without proper scope nesting'): cpu.__exit__() self.assertEndsWith(current_device(), 'GPU:0') gpu.__exit__() self.assertEndsWith(current_device(), 'CPU:0')
def testReEntrant(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') cpu = context.device('cpu:0') gpu = context.device('gpu:0') with cpu: with gpu: with gpu: self.assertEndsWith(current_device(), 'GPU:0') self.assertEndsWith(current_device(), 'GPU:0') self.assertEndsWith(current_device(), 'CPU:0') with gpu: self.assertEndsWith(current_device(), 'GPU:0')
def testImageSummary(self): with context.device(self._test_device): a = constant_op.constant([[10.0, 20.0], [-20.0, -10.0]]) self._writer.histogram("image1", a) event = self._readLastEvent() self.assertEqual("image1", event.summary.value[0].tag) self.assertTrue(event.summary.value[0].image)
def testHistogramSummary(self): with context.device(self._test_device): y = constant_op.constant([1.0, 3.0, 3.0, 7.0]) self._writer.histogram("y", y) event = self._readLastEvent() self.assertEqual("y", event.summary.value[0].tag) self.assertTrue(event.summary.value[0].histo)
def testScalarSummary(self): with context.device(self._test_device): x = constant_op.constant(1337.0) self._writer.scalar("x", x) event = self._readLastEvent() self.assertTrue("x", event.summary.value[0].tag) self.assertEqual(1337.0, event.summary.value[0].simple_value)
def benchmark_defun_matmul_100_by_784_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_100_by_784.gpu() self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784)
def benchmark_read_variable_op_with_tape_2_by_2_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2.gpu()) self._benchmark_read_variable_with_tape( m, num_iters=self._num_iters_2_by_2)
def _update_global_step_tensor(self): with context.device(self._CPU_DEVICE): if self._global_step_dirty: self._global_step_dirty = False return state_ops.assign(self._global_step_tensor, self._global_step) else: return self._global_step_tensor
def benchmark_defun_matmul_2_by_2_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_2_by_2.gpu() self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2)
def fn(x): with context.device('/gpu:0'): b = tensor.Tensor(2.0) c = math_ops.add(x.as_gpu_tensor(), b) # TODO(apassos): remove as_cpu_tensor below by making TensorVSPace aware # of devices. return math_ops.add(c, tensor.Tensor(3.0)).as_cpu_tensor()
def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=""): """Summary writer for TensorBoard, compatible with eager execution. If necessary, multiple instances of `SummaryWriter` can be created, with distinct `logdir`s and `name`s. Each `SummaryWriter` instance will retain its independent `global_step` counter and data writing destination. Example: ```python writer = tfe.SummaryWriter("my_model") # ... Code that sets up the model and data batches ... for _ in xrange(train_iters): loss = model.train_batch(batch) writer.scalar("loss", loss) writer.step() ``` Args: logdir: Directory in which summary files will be written. max_queue: Number of summary items to buffer before flushing to filesystem. If 0, summaries will be flushed immediately. flush_secs: Number of secondsbetween forced commits to disk. filename_suffix: Suffix of the event protobuf files in which the summary data are stored. Raises: ValueError: If this constructor is called not under eager execution. """ # TODO(apassos, ashankar): Make this class and the underlying # contrib.summary_ops compatible with graph model and remove this check. if not context.in_eager_mode(): raise ValueError( "Use of SummaryWriter is currently supported only with eager " "execution enabled. File an issue at " "https://github.com/tensorflow/tensorflow/issues/new to express " "interest in fixing this.") # TODO(cais): Consider adding name keyword argument, which if None or empty, # will register the global global_step that training_util.get_global_step() # can find. with context.device(self._CPU_DEVICE): self._name = uuid.uuid4().hex self._global_step = 0 self._global_step_tensor = variable_scope.get_variable( "global_step/summary_writer/" + self._name, shape=[], dtype=dtypes.int64, initializer=init_ops.zeros_initializer()) self._global_step_dirty = False self._resource = gen_summary_ops.summary_writer(shared_name=self._name) gen_summary_ops.create_summary_file_writer( self._resource, logdir, max_queue, flush_secs, filename_suffix) # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device=self._CPU_DEVICE)
def audio(self, name, tensor, sample_rate, max_outputs, family=None): """Write an audio summary. Args: name: A name for the generated node. Will also serve as a series name in TensorBoard. tensor: A 3-D `float32` `Tensor` of shape `[batch_size, frames, channels]` or a 2-D `float32` `Tensor` of shape `[batch_size, frames]`, or compatible value type. sample_rate: A Scalar `float32` `Tensor` indicating the sample rate of the signal in hertz. max_outputs: Max number of batch elements to generate audio for. family: Optional; if provided, used as the prefix of the summary tag name, which controls the tab name used for display on Tensorboard. """ with context.device(self._CPU_DEVICE): with summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): gen_summary_ops.write_audio_summary( self._resource, self._update_global_step_tensor(), tag, _maybe_cpu(tensor), sample_rate=_maybe_cpu(sample_rate), max_outputs=max_outputs, name=scope)
def fn(x): with context.device('/gpu:0'): b = constant_op.constant(2.0) c = math_ops.add(x.gpu(), b) # TODO(apassos): remove cpu below by making TensorVSPace aware # of devices. return math_ops.add(c, constant_op.constant(3.0)).cpu()
def _testCpu(self, x): np_ans = np.array(x) with context.device("/device:CPU:0"): tf_ans = ops.convert_to_tensor(x).numpy() if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]: self.assertAllClose(np_ans, tf_ans) else: self.assertAllEqual(np_ans, tf_ans)
def setUp(self): super(SummaryWriterTest, self).setUp() self._test_device = "gpu:0" if context.num_gpus() else "cpu:0" self._tmp_logdir = tempfile.mkdtemp() with context.device(self._test_device): # Use max_queue=0 so that summaries are immediately flushed to filesystem, # making testing easier. self._writer = summary_writer.SummaryWriter(self._tmp_logdir, max_queue=0)
def benchmark_defun_matmul_forward_backward_2_by_2_CPU_async(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_forward_backward( m, transpose_b=False, num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC)
def benchmark_tf_matmul_100_by_784_CPU_async(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784, execution_mode=context.ASYNC)
def testAudioSummary(self): with context.device(self._test_device): w = constant_op.constant(np.random.rand(3, 10, 2), dtype=dtypes.float32) fs = constant_op.constant(44100.0, dtype=dtypes.float32) max_outputs = 1 self._writer.audio("audio1", w, fs, max_outputs) event = self._readLastEvent() self.assertTrue(event.summary.value[0].audio)
def testTensorPlacement(self): x = constant_op.constant(1.).gpu() with context.device('gpu:0'): y = constant_op.constant(2.) # Add would fail if t2 were not on GPU result = execute( b'Add', 1, inputs=[x, y], attrs=('T', x.dtype.as_datatype_enum))[0].cpu().numpy() self.assertEqual(3, result)
def testGlobalStep(self): with context.device(self._test_device): orig_step = self._writer.global_step self._writer.step() self.assertEqual(orig_step + 1, self._writer.global_step) self.assertEqual(orig_step + 1, self._writer.global_step) self._writer.step() self._writer.step() self.assertEqual(orig_step + 3, self._writer.global_step)
def benchmark_tf_matmul_100_by_784_GPU_async(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_100_by_784.gpu() self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784, execution_mode=context.ASYNC)
def testTensorCopyGPU2CPU2GPU(self): def f(a, b): return a.cpu() + b.cpu() with context.device('/gpu:0'): a = constant_op.constant(1.0) b = constant_op.constant(2.0) grad = backprop.gradients_function(f, [0])(a, b)[0] self.assertAllEqual(grad, 1.0)
def benchmark_tf_matmul_2_by_2_GPU_async(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_2_by_2.gpu() self._benchmark_tf_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC)
def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters): @def_function.function def add_all(*args): return math_ops.add_n(*args) with context.device(CPU): resources = [] for _ in range(num_resources): resources.append(resource_variable_ops.ResourceVariable(self._m_2)) self._run(lambda: add_all(resources), num_iters)
def _testGpu(self, x): device = test_util.gpu_device_name() if device: np_ans = np.array(x) with context.device(device): tf_ans = ops.convert_to_tensor(x).numpy() if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]: self.assertAllClose(np_ans, tf_ans) else: self.assertAllEqual(np_ans, tf_ans)
def testInt32CPUDefault(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') with context.device('/gpu:0'): r = constant_op.constant(1) + constant_op.constant(2) self.assertAllEqual(r, 3)
def benchmark_defun_matmul_2_by_2_with_signature_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_with_signature( m, num_iters=self._num_iters_2_by_2)
def benchmark_tfe_py_fastpath_execute_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_tfe_py_fastpath_execute_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784)
def _benchmark_tf_zeros(self, shape, dtype, device=CPU): with context.device(device): func = lambda: array_ops.zeros(shape, dtype) self._run(func, 3000)
def benchmark_read_variable_op_2_by_2_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2.gpu()) self._benchmark_read_variable(m, num_iters=self._num_iters_2_by_2)
def testGPUInt64(self): if not context.context().num_gpus(): return with context.eager_mode(), context.device("gpu:0"): v = resource_variable_ops.ResourceVariable(1, dtype=dtypes.int64) self.assertAllEqual(1, v.numpy())
def testStackGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testStack()
def _benchmark_tf_tensordot(self, device=CPU, execution_mode=None): with context.device(device): a = array_ops.ones((2, 2)) b = array_ops.ones((2, 2)) func = lambda: math_ops.tensordot(a, b, [[1], [0]]) self._run(func, 30000, execution_mode=execution_mode)
def benchmark_tf_multiply_op_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_2.gpu() self._benchmark_tf_multiply_op(m, 30000)
def benchmark_tf_gradient_function_no_op(self): with context.device(CPU): m = gen_array_ops.identity(self._m_2) self._run(lambda: backprop.gradients_function(lambda x: x, [0])(m), 30000)
def f(): with context.device('gpu:0'): return v.read_value()
def testInvalidDevice(self): with self.assertRaises(ValueError): with context.device('pu:0'): _ = constant_op.constant(1)
def benchmark_tfe_py_execute_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_tfe_py_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2)
def testIdentityOnVariable(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') with context.device('/gpu:0'): v = resource_variable_ops.ResourceVariable(True) self.assertAllEqual(True, array_ops.identity(v))
def testPushPopGPU(self, max_num_elements): if not context.num_gpus(): return with context.device("gpu:0"): self._testPushPop(max_num_elements)
def testFromTensorGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testTensorListFromTensor()
def testInt32CPUDefault(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') with context.device('/gpu:0'): r = tensor.Tensor(1) + tensor.Tensor(2) self.assertEqual(r.numpy(), 3)
def _zeros(shape, dtype): with context.device("cpu:0"): shape = tensor.Tensor(shape, dtype=dtypes.int32) return array_ops.fill(shape, tensor.Tensor(0, dtype=dtype))
def f(a, b): with context.device('/gpu:0'): c = math_ops.add(a.as_gpu_tensor(0), b.as_gpu_tensor(0)) return math_ops.add(c.as_cpu_tensor(), constant_op.constant(3.0))
def benchmark_defun_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784)
def f(): with context.device('gpu:0'): tape.watch_variable(v) return v.read_value()
def benchmark_defun_args_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_args_matmul(m, num_iters=self._num_iters_2_by_2)
def benchmark_defun_matmul_2_by_2_relaxed_shape_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_relaxed_shape( m, num_iters=self._num_iters_2_by_2)
def benchmark_tf_multiply_op_CPU(self): with context.device(CPU): m = self._m_2.cpu() self._benchmark_tf_multiply_op(m, 30000)
def f(a, b): with context.device('/gpu:0'): c = math_ops.add(a.gpu(0), b.gpu(0)) return math_ops.add(c.cpu(), constant_op.constant(3.0))
def benchmark_defun_matmul_forward_backward_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_forward_backward( m, transpose_b=False, num_iters=self._num_iters_2_by_2)
def testGetSetGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testGetSetItem()
def testPushPopGPU(self): if not context.num_gpus(): return with context.device("gpu:0"): self.testPushPop()
def benchmark_read_variable_op_with_tape_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_read_variable_with_tape( m, num_iters=self._num_iters_2_by_2)
def benchmark_defun_args_matmul_2_by_2_GPU(self): if not context.num_gpus(): return with context.device(GPU): m = self._m_2_by_2.gpu() self._benchmark_defun_args_matmul(m, num_iters=self._num_iters_2_by_2)