def testApproximateEqual(self): for dtype in [np.float32, np.double]: x = dtype(1) y = dtype(1.00009) z = False with test_util.device(use_gpu=True): # Default tolerance is 0.00001 z_tf = self.evaluate(math_ops.approximate_equal(x, y)) self.assertAllEqual(z, z_tf) for dtype in [np.float32, np.double]: x = dtype(1) y = dtype(1.000009) z = True with test_util.device(use_gpu=True): # Default tolerance is 0.00001 z_tf = self.evaluate(math_ops.approximate_equal(x, y)) self.assertAllEqual(z, z_tf) for dtype in [np.float32, np.double]: x = np.array([[[[-1, 2.00009999], [-3, 4.01]]]], dtype=dtype) y = np.array([[[[-1.001, 2], [-3.00009, 4]]]], dtype=dtype) z = np.array([[[[False, True], [True, False]]]], dtype=np.bool) with test_util.device(use_gpu=True): z_tf = self.evaluate(math_ops.approximate_equal(x, y, tolerance=0.0001)) self.assertAllEqual(z, z_tf)
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = util.Checkpoint( optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) checkpoint_path = checkpoint_management.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = constant_op.constant([[3.]]) train_fn = functools.partial( optimizer.minimize, functools.partial(model, input_value), global_step=root.global_step) if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def testAcceptsTensor(self): tensor = array_ops.ones([10, 10]) result = math_ops.scalar_mul(3, tensor) expected = array_ops.ones([10, 10]) * 3 with test_util.device(use_gpu=True): self.assertAllEqual(self.evaluate(expected), self.evaluate(result))
def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_util.device(use_gpu=True): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = util.TrackableSaver(graph_view.ObjectGraphView(root)) self._set_sentinels(root) status = object_saver.restore(save_path) if context.executing_eagerly(): self._check_sentinels(root) if context.executing_eagerly(): with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"): status.assert_consumed() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegexp(AssertionError, "not restored"): status.assert_consumed() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status = object_saver.restore(save_path) status.initialize_or_restore() self._check_sentinels(root)
def testEagerSingleOutputFloat32(self): with test_util.device(use_gpu=True): a = array_ops.ones((3, 3), dtype=dtypes.float32) x = array_ops.ones((3, 1), dtype=dtypes.float32) output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.float32) ret = self.evaluate(output) self.assertAllClose(ret, [[3.0], [3.0], [3.0]])
def testBasicMemory(self): """Make sure arguments can be passed correctly.""" with test_util.device(use_gpu=False): a = constant_op.constant(10, name="a") b = constant_op.constant(20, name="b") c = math_ops.add_n([a, b], name="c") d = math_ops.add_n([b, c], name="d") train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(d) mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) report = cost_analyzer.GenerateMemoryReport(mg) # Print the report to make it easier to debug print("{}".format(report)) # Check the report self.assertTrue( "Peak usage for device /job:localhost/replica:0/task:0/device:CPU:0: " "16 bytes" in report) self.assertTrue(" a:0 uses 4 bytes" in report) self.assertTrue(" b:0 uses 4 bytes" in report) self.assertTrue(" c:0 uses 4 bytes" in report) self.assertTrue(" d:0 uses 4 bytes" in report)
def _compare(self, c, x, y, use_gpu): np_ans = np.where(c, x, y) with test_util.device(use_gpu=use_gpu): out = array_ops.where(c, x, y) tf_ans = self.evaluate(out) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. num_training_steps = 10 checkpoint_directory = self.get_temp_dir() for training_continuation in range(3): with test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) manager = checkpoint_management.CheckpointManager( root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = constant_op.constant([[3.]]) train_fn = functools.partial( optimizer.minimize, functools.partial(model, input_value), global_step=root.global_step) if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual((training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def Test(self): np_val = np.matrix(a_np_) * np.matrix(b_np_) use_gpu = True if a_np_.dtype is np.float16 and ( not test_util.CudaSupportsHalfMatMulAndConv()): use_gpu = False print("Built without fp16 matmul support for Cuda, running test on CPU.") # Transpose and possibly conjugate a_np_ and b_np_ according to the # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs) # results in a valid matrix multiplication and produces the same result as # np.matrix(a_np_) * np.matrix(b_np_) effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_) effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_) with self.cached_session() as sess, test_util.device(use_gpu): if use_static_shape_: a = constant_op.constant(effective_a_np) b = constant_op.constant(effective_b_np) res = math_ops.matmul(a, b, **kwargs_) tf_val = self.evaluate(res) else: a = array_ops.placeholder(a_np_.dtype) b = array_ops.placeholder(b_np_.dtype) res = math_ops.matmul(a, b, **kwargs_) tf_val = sess.run(res, feed_dict={a: effective_a_np, b: effective_b_np}) self.assertAllCloseAccordingToType( tf_val, np_val, float_rtol=2e-5, float_atol=2e-5, half_rtol=0.2, half_atol=0.2)
def _compareConj(self, cplx, use_gpu): np_ans = np.conj(cplx) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(cplx) tf_conj = math_ops.conj(inx) tf_ans = self.evaluate(tf_conj) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, tf_conj)
def testAcceptsIndexedSlices(self): values = constant_op.constant([2, 3, 5, 7, 0, -1], shape=[3, 2]) indices = constant_op.constant([0, 2, 5]) x = math_ops.scalar_mul(-3, ops.IndexedSlices(values, indices)) with test_util.device(use_gpu=True): self.assertAllEqual(self.evaluate(x.values), [[-6, -9], [-15, -21], [0, 3]]) self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])
def testSmallEntropy(self): random_seed.set_random_seed(1618) with test_util.device(use_gpu=True): # A logit value of -10 corresponds to a probability of ~5e-5. logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]]) num_samples = 1000 samples = self.evaluate(random_ops.multinomial(logits, num_samples)) self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
def testEagerArrayOutput(self): with test_util.device(use_gpu=True): a = array_ops.ones((3, 3), dtype=dtypes.float32) x = array_ops.ones((3, 1), dtype=dtypes.float32) output = script_ops.eager_py_func( lambda a, x: [matmul(a, x)], inp=[a, x], Tout=[dtypes.float32]) ret = self.evaluate(output) self.assertAllEqual(ret, [[[3.0], [3.0], [3.0]]])
def _not(self, x, use_gpu=False): np_ans = np.logical_not(x) with test_util.device(use_gpu=use_gpu): out = math_ops.logical_not(ops.convert_to_tensor(x)) tf_val = self.evaluate(out) self.assertEqual(out.dtype, dtypes_lib.bool) self.assertAllEqual(np_ans, tf_val) self.assertShapeEqual(np_ans, out)
def testSquaredDifference(self): for dtype in [np.int32, np.float16]: x = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) y = np.array([-3, -2, -1], dtype=dtype) z = (x - y) * (x - y) with test_util.device(use_gpu=True): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf)
def testComplexSquaredDifference(self): for dtype in [np.complex64, np.complex128]: x = np.array([[1 + 3j, 2 + 2j, 3 + 1j], [4 - 1j, 5 - 2j, 6 - 3j]], dtype=dtype) y = np.array([-3 + 1j, -2 + 2j, -1 + 3j], dtype=dtype) z = np.conj(x - y) * (x - y) with test_util.device(use_gpu=False): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf)
def _testEmpty(self, x, dim, num, expected_shape): with test_util.device(use_gpu=True): tf_ans = array_ops.split(value=x, num_or_size_splits=num, axis=dim) out = self.evaluate(tf_ans) self.assertEqual(x.size, 0) self.assertEqual(len(out), num) for i in range(num): self.assertEqual(out[i].shape, expected_shape) self.assertEqual(expected_shape, tf_ans[i].get_shape())
def _compare(self, x, y, use_gpu): np_min, np_max = np.minimum(x, y), np.maximum(x, y) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) omin, omax = math_ops.minimum(inx, iny), math_ops.maximum(inx, iny) tf_min, tf_max = self.evaluate([omin, omax]) self.assertAllEqual(np_min, tf_min) self.assertAllEqual(np_max, tf_max)
def _compare(self, c, x, y, use_gpu): np_ans = np.dstack( [x_i if c_i else y_i for c_i, x_i, y_i in zip(c, x, y)]).transpose( [2, 0, 1]) with test_util.device(use_gpu=use_gpu): out = array_ops.where(c, x, y) tf_ans = self.evaluate(out) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def testRounding(self): x = np.arange(-5.0, 5.0, .25) for dtype in [np.float32, np.double, np.int32]: x_np = np.array(x, dtype=dtype) with test_util.device(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf = math_ops.round(x_tf) y_tf_np = self.evaluate(y_tf) y_np = np.round(x_np) self.assertAllClose(y_tf_np, y_np, atol=1e-2)
def _compareAngle(self, cplx, use_gpu): np_angle = np.angle(cplx) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(cplx) tf_angle = math_ops.angle(inx) tf_angle_val = self.evaluate(tf_angle) self.assertAllClose(np_angle, tf_angle_val) self.assertShapeEqual(np_angle, tf_angle)
def testAcceptsRefs(self): if context.executing_eagerly(): var = resource_variable_ops.ResourceVariable(10, name="var") else: var = variables.Variable(10) result = math_ops.scalar_mul(3, var) init = variables.global_variables_initializer() with test_util.device(use_gpu=True): self.evaluate(init) self.assertEqual(30, self.evaluate(result))
def _compareBinary(self, x, y, np_func, tf_func, use_gpu=False): np_ans = np_func(x, y) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = tf_func(inx, iny) tf_val = self.evaluate(out) self.assertEqual(out.dtype, dtypes_lib.bool) self.assertAllEqual(np_ans, tf_val) self.assertShapeEqual(np_ans, out)
def testEagerPyFuncInDefun(self): with test_util.device(use_gpu=True): def wrapper(): a = array_ops.ones((3, 3), dtype=dtypes.float32) x = array_ops.ones((3, 1), dtype=dtypes.float32) return script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.float32) wrapped = function.defun(wrapper) ret = self.evaluate(wrapped()) self.assertAllEqual(ret, [[3.0], [3.0], [3.0]])
def _testSpecialCasesVariable(self): inp = np.random.rand(4, 4).astype("f") with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(inp, [4], 0)) self.assertAllEqual(result[0], inp) result = self.evaluate(array_ops.split(inp, [-1, 3], 0)) self.assertAllEqual(result[0], inp[0:1, :]) self.assertAllEqual(result[1], inp[1:4, :])
def testReduceExplicitAxes(self): x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) with test_util.device(use_gpu=True): for axis in (0, -2, (0, 0), (0, -2)): self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), [5, 7, 9]) for axis in (1, -1, (1, 1), (1, -1)): self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), [6, 15]) for axis in (None, (0, 1), (-1, -2), (-2, -1, 0, 1)): self.assertEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21)
def _compareMake(self, real, imag, use_gpu): np_ans = real + (1j) * imag with test_util.device(use_gpu=use_gpu): real = ops.convert_to_tensor(real) imag = ops.convert_to_tensor(imag) tf_ans = math_ops.complex(real, imag) out = self.evaluate(tf_ans) self.assertAllEqual(np_ans, out) self.assertShapeEqual(np_ans, tf_ans)
def _compare(self, x, dim, num): np_ans = np.split(x, num, dim) with test_util.device(use_gpu=True): tf_ans = array_ops.split(value=x, num_or_size_splits=num, axis=dim) out = self.evaluate(tf_ans) self.assertEqual(num, len(np_ans)) self.assertEqual(num, len(np_ans)) self.assertEqual(num, len(out)) for i in range(num): self.assertAllEqual(np_ans[i], out[i]) self.assertShapeEqual(np_ans[i], tf_ans[i])
def testListOfScalarTensors(self): a = math_ops.to_int32(5) b = math_ops.to_int32(6) value = np.random.rand(11, 11) with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(value, [a, b])) self.assertAllEqual(result[0], value[0:5, :]) self.assertAllEqual(result[1], value[5:, :])
def testEagerReturnNone(self): with test_util.device(use_gpu=True): def no_return_value(): return output = script_ops.eager_py_func(no_return_value, inp=[], Tout=[]) ret = self.evaluate(output) if context.executing_eagerly(): self.assertEquals(len(ret), 0) else: self.assertIsNone(ret)
def _ConstructAndTestGradient(self, image_shape, kernel_shape, strides, rates, padding, use_gpu): """Verifies the gradients of the erosion function. Args: image_shape: Input shape, [batch, in_height, in_width, channels]. kernel_shape: Filter shape, [filter_height, filter_width, channels]. strides: Output strides, specified as [stride_height, stride_width]. rates: Atrous rates, specified as [rate_height, rate_width]. padding: Padding type. use_gpu: Whether we are running on GPU. """ assert image_shape[3] == kernel_shape[2] np.random.seed(1) # Make it reproducible. image = np.random.random_sample(image_shape).astype(np.float32) kernel = np.random.random_sample(kernel_shape).astype(np.float32) strides = [1] + strides + [1] rates = [1] + rates + [1] image_tensor = constant_op.constant(image, shape=image_shape, name="input") kernel_tensor = constant_op.constant(kernel, shape=kernel_shape, name="filter") def compute_erosion2d(image_tensor, kernel_tensor): return nn_ops.erosion2d(image_tensor, kernel_tensor, strides=strides, rates=rates, padding=padding, name="erosion2d") with test_util.device(use_gpu=use_gpu): with self.cached_session(): # Small delta is necessary for argmax to remain the same. err1 = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient( lambda x: compute_erosion2d(x, kernel_tensor), [image_tensor])) err2 = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient( lambda x: compute_erosion2d(image_tensor, x), [kernel_tensor])) err = max(err1, err2) print("Erosion gradient error = %f" % err) self.assertLess(err, 1e-4)
def _VariableRankTest(self, np_scatter, tf_scatter, vtype, itype, repeat_indices=False): np.random.seed(8) ref_shapes = [(3, 6), (3, 6), (3, 6, 9), (3, 6, 9), (3, 6, 9), (3, 6, 9)] indices_shapes = [(2, ), (2, 2), (2, ), (2, 2), (2, 3), (2, 3, 3)] with test_util.device(use_gpu=True): for ref_shape, indices_shape in zip(ref_shapes, indices_shapes): num_updates = indices_shape[0] ixdim = indices_shape[-1] indexable_area_shape = () for i in range(ixdim): indexable_area_shape += (ref_shape[i], ) all_indices = [ list(coord) for coord, _ in np.ndenumerate( np.empty(indexable_area_shape, vtype)) ] np.random.shuffle(all_indices) indices = np.array(all_indices[:num_updates]) if num_updates > 1 and repeat_indices: indices = indices[:num_updates // 2] for _ in range(num_updates - num_updates // 2): indices = np.append( indices, [indices[np.random.randint(num_updates // 2)]], axis=0) np.random.shuffle(indices) indices = _AsType(indices[:num_updates], itype) updates_shape = (num_updates, ) for i in range(ixdim, len(ref_shape)): updates_shape += (ref_shape[i], ) updates = _AsType(np.random.randn(*(updates_shape)), vtype) ref = _AsType(np.random.randn(*(ref_shape)), vtype) # Scatter via numpy new = ref.copy() np_scatter(new, indices, updates) # Scatter via tensorflow ref_var = variables.VariableV1(ref) self.evaluate(ref_var.initializer) self.evaluate(tf_scatter(ref_var, indices, updates)) # Compare self.assertAllClose(new, self.evaluate(ref_var))
def test_unified_lstm_output_on_multiple_kernel(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) with test_util.device(use_gpu=False): layer = keras.layers.UnifiedLSTM(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with test_util.device(use_gpu=True): layer = keras.layers.UnifiedLSTM(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) # Note that CuDNN uses 'sigmoid' as activation, so the unified LSTM uses # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve # the same output. with test_util.device(use_gpu=True): layer = keras.layers.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = layer(inputs) canonical_model = keras.models.Model(inputs, output) # Remove the extra cudnn bias since canonical lstm will not use it. canonical_model.set_weights(weights[:3]) y_3 = canonical_model.predict(x_train) self.assertAllClose(y_1, y_2) self.assertAllClose(y_2, y_3)
def test_gru_v2_output_on_multiple_kernel(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) with test_util.device(use_gpu=False): layer = rnn.GRU(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with test_util.device(use_gpu=True): layer = rnn.GRU(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) # Note that CuDNN uses 'sigmoid' as activation, so the GRU V2 uses # 'sigmoid' as default. Construct the canonical GRU with sigmoid to achieve # the same output. with test_util.device(use_gpu=True): layer = rnn_v1.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = layer(inputs) canonical_model = keras.models.Model(inputs, output) canonical_model.set_weights(weights) y_3 = canonical_model.predict(x_train) self.assertAllClose(y_1, y_2, rtol=1e-5, atol=1e-5) self.assertAllClose(y_2, y_3, rtol=1e-5, atol=1e-5)
def _compareRealImag(self, cplx, use_gpu): np_real, np_imag = np.real(cplx), np.imag(cplx) np_zeros = np_real * 0 with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(cplx) tf_real = math_ops.real(inx) tf_imag = math_ops.imag(inx) tf_real_real = math_ops.real(tf_real) tf_imag_real = math_ops.imag(tf_real) self.assertAllEqual(np_real, self.evaluate(tf_real)) self.assertAllEqual(np_imag, self.evaluate(tf_imag)) self.assertAllEqual(np_real, self.evaluate(tf_real_real)) self.assertAllEqual(np_zeros, self.evaluate(tf_imag_real))
def _testGradientsSimpleVariable(self, dtype): inp = self._makeData((4, 4), dtype) with test_util.device(use_gpu=True): inp_tensor = ops.convert_to_tensor(inp) s = array_ops.split(inp_tensor, [1, 3], 1) inp_grads = [ self._makeData((4, 1), dtype), self._makeData((4, 3), dtype) ] grad_tensors = [constant_op.constant(x) for x in inp_grads] grad = gradients_impl.gradients(s, [inp_tensor], grad_tensors)[-1] result = self.evaluate(grad) self.assertAllEqual(result[:, 0:1], inp_grads[0]) self.assertAllEqual(result[:, 1:4], inp_grads[1])
def _testHugeNumberOfTensorsVariable(self, dtype): num_split = 1000 size_splits = np.random.randint(1, 3, num_split, dtype=np.int32) shape = [3, np.sum(size_splits)] split_dim = 1 inp = self._makeData(shape, dtype) with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(inp, size_splits, split_dim)) slices = [slice(0, x) for x in shape] offset = 0 for i in range(num_split): slices[split_dim] = slice(offset, offset + size_splits[i]) offset += size_splits[i] self.assertAllEqual(result[i], inp[slices])
def testSimpleResource(self): indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) for dtype in (dtypes.int32, dtypes.float32): updates = constant_op.constant([9, 10, 11, 12], dtype=dtype) ref = resource_variable_ops.ResourceVariable( [0, 0, 0, 0, 0, 0, 0, 0], dtype=dtype) expected = np.array([0, 11, 0, 10, 9, 0, 0, 12]) scatter = state_ops.scatter_nd_update(ref, indices, updates) with test_util.device(use_gpu=True): self.evaluate(ref.initializer) self.evaluate(scatter) self.assertAllClose(ref, expected)
def testReduceExplicitAxes(self): x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) with test_util.device(use_gpu=True): for axis in (0, -2): self.assertAllEqual( self.evaluate(math_ops.reduce_sum(x, axis=axis)), [5, 7, 9]) for axis in (1, -1): self.assertAllEqual( self.evaluate(math_ops.reduce_sum(x, axis=axis)), [6, 15]) for axis in (None, (0, 1), (1, 0), (-1, 0), (0, -1), (-2, 1), (1, -2), (-1, -2), (-2, -1)): self.assertEqual( self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21)
def test_explicit_device_with_go_backward_and_mask(self): batch_size = 8 timestep = 7 masksteps = 5 units = 4 inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) mask = np.ones((batch_size, timestep)).astype(np.bool) mask[:, masksteps:] = 0 # Test for V1 behavior. lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True) with test_util.device(use_gpu=True): outputs_masked_v1 = lstm_v1(inputs, mask=constant_op.constant(mask)) outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) # Test for V2 behavior. lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True) with test_util.device(use_gpu=True): outputs_masked = lstm(inputs, mask=constant_op.constant(mask)) outputs_trimmed = lstm(inputs[:, :masksteps]) self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def testSmallEntropy(self): random_seed.set_random_seed(1618) for output_dtype in [np.int32, np.int64]: with test_util.device(use_gpu=True): # A logit value of -10 corresponds to a probability of ~5e-5. logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]]) num_samples = 1000 samples = self.evaluate( random_ops.multinomial(logits, num_samples, output_dtype=output_dtype)) self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
def ctc_loss_gpu(labels, logits, label_length, logit_length): with test_util.device(use_gpu=True): sparse_labels = ctc_ops.dense_labels_to_sparse( labels, label_length) with backprop.GradientTape() as t: t.watch(logits) loss = ctc_ops.ctc_loss_v3(labels=sparse_labels, logits=logits, label_length=label_length, logit_length=logit_length, blank_index=0) grad = t.gradient(loss, [logits]) return loss, grad
def testAxis0DefaultGPU(self): # tf.parallel_stack is only supported in graph mode. with ops.Graph().as_default(): with test_util.device(use_gpu=True): t = [ constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6]) ] stacked = self.evaluate(array_ops.stack(t)) parallel_stacked = self.evaluate(array_ops.parallel_stack(t)) expected = np.array([[1, 2, 3], [4, 5, 6]]) self.assertAllEqual(stacked, expected) self.assertAllEqual(parallel_stacked, expected)
def test_gru_v2_feature_parity_with_canonical_gru(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, random_seed=random_seed.DEFAULT_GRAPH_SEED) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) masked_input = keras.layers.Masking()(inputs) gru_layer = rnn_v1.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = gru_layer(masked_input) gru_model = keras.models.Model(inputs, output) weights = gru_model.get_weights() y_1 = gru_model.predict(x_train) gru_model.compile('rmsprop', 'mse') gru_model.fit(x_train, y_train) y_2 = gru_model.predict(x_train) with test_util.device(use_gpu=True): cudnn_layer = rnn.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5)
def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, dilations, strides, padding, data_format, dtype, use_ve): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols, input_depth, output_depth]. dilations: Dilated rate: [col_dilation, row_dilation] strides: Stride: [col_stride, row_stride] padding: Padding type. data_format: Format of the data tensors. dtype: Data type for inputs and outputs. use_ve: True if the operations should be run on VE Returns: Symbolic tensor value that can be used to execute the computation """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with test_util.device(use_ve=use_ve, use_gpu=False): t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) strides = [1] + strides + [1] dilations = [1] + dilations + [1] if data_format == "NCHW": t1 = test_util.NHWCToNCHW(t1) strides = test_util.NHWCToNCHW(strides) dilations = test_util.NHWCToNCHW(dilations) conv = nn_ops.conv2d(t1, t2, dilations=dilations, strides=strides, padding=padding, data_format=data_format) if data_format == "NCHW": conv = test_util.NCHWToNHWC(conv) return conv
def _testPad(self, np_inputs, paddings, mode, constant_values): np_val = self._npPad(np_inputs, paddings, mode=mode, constant_values=constant_values) for use_gpu in [True, False]: with test_util.device(use_gpu=use_gpu): tf_val = array_ops.pad(np_inputs, paddings, mode=mode, constant_values=constant_values) out = self.evaluate(tf_val) if np_inputs.dtype in [self._qint8, self._quint8, self._qint32]: # Cast quantized types back to their numpy equivalents. np_val = np_val.astype(np_inputs.dtype[0]) self.assertAllEqual(np_val, out) self.assertShapeEqual(np_val, tf_val)
def testWithDefun(self): num_training_steps = 2 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()), test_util.device( use_gpu=True): model = MyModel() # Don't actually train so we can test variable values optimizer = adam.AdamOptimizer(0.) root = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) checkpoint_path = core_saver.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) def train_fn(): @function.defun def _call_model(x): return model(x) with backprop.GradientTape() as tape: loss = _call_model(constant_op.constant([[3.]])) gradients = tape.gradient(loss, model.variables) return optimizer.apply_gradients( zip(gradients, model.variables), global_step=root.global_step) if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() if training_continuation > 0: status.assert_consumed() self.assertAllClose([[42.]], self.evaluate(model.variables[0])) else: self.evaluate(model.variables[0].assign([[42.]])) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def _test01(self, dtype): with test_util.device(True): x1 = constant_op.constant([-3, -2, -1, 0, 1, 2], dtype=dtype) x2 = constant_op.constant([1, 1, 1, 1, 1, 1], dtype=dtype) x3 = constant_op.constant([-1, -1, -1, -1, -1, -1], dtype=dtype) relu1 = nn_ops.relu(x1) add1 = math_ops.add(relu1, x2) add2 = math_ops.add(relu1, x3) add3 = math_ops.add(add1, add2) grad = gradients_impl.gradients(add3, x1) y1 = array_ops.identity(grad) return (y1, )
def testConstParallelGPU(self): # tf.parallel_stack is only supported in graph mode. with ops.Graph().as_default(): np.random.seed(7) with test_util.device(use_gpu=True): for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): with self.subTest(shape=shape): data = self.randn(shape, np.float32) if len(shape) == 1: data_list = list(data) cl = array_ops.parallel_stack(data_list) self.assertAllEqual(cl, data) data = self.randn(shape, np.float32) c = array_ops.parallel_stack(data) self.assertAllEqual(c, data)
def test_unified_gru_feature_parity_with_canonical_gru(self): with context.eager_mode(): # Run this test under eager only due to b/120160788 for model.set_weights. input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size) y_train = keras.utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) masked_input = keras.layers.Masking()(inputs) gru_layer = rnn_v1.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = gru_layer(masked_input) gru_model = keras.models.Model(inputs, output) weights = gru_model.get_weights() y_1 = gru_model.predict(x_train) gru_model.compile('rmsprop', 'mse') gru_model.fit(x_train, y_train) y_2 = gru_model.predict(x_train) with test_util.device(use_gpu=True): cudnn_layer = rnn.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5)
def _compare(self, x, use_gpu): with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(x) ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf( inx), math_ops.is_nan(inx) tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan]) if x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: # Numpy will implicitly convert bfloat16 value to float16, so we cast to # float32 to avoid this. x = x.astype(np.float32) np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) self.assertAllEqual(np_inf, tf_inf) self.assertAllEqual(np_nan, tf_nan) self.assertAllEqual(np_finite, tf_finite) self.assertShapeEqual(np_inf, oinf) self.assertShapeEqual(np_nan, onan) self.assertShapeEqual(np_finite, ofinite)
def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_util.device(use_gpu=True): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = checkpointable_utils.CheckpointableSaver(root) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_consumed() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status.initialize_or_restore() self._check_sentinels(root)
def testEagerExceptionHandling(self): with test_util.device(use_gpu=True): self.verifyExceptionHandling( ValueError, errors.InvalidArgumentError, eager=True) self.verifyExceptionHandling( TypeError, errors.InvalidArgumentError, eager=True) self.verifyExceptionHandling( StopIteration, errors.OutOfRangeError, eager=True) self.verifyExceptionHandling( MemoryError, errors.ResourceExhaustedError, eager=True) self.verifyExceptionHandling( NotImplementedError, errors.UnimplementedError, eager=True) class WeirdError(Exception): pass self.verifyExceptionHandling(WeirdError, errors.UnknownError, eager=True)
def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_util.device(use_gpu=True): with self.test_session(): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = trackable_utils.TrackableSaver( graph_view.ObjectGraphView(root)) self._set_sentinels(root) status = object_saver.restore(save_path) if context.executing_eagerly(): self._check_sentinels(root) if context.executing_eagerly(): status.assert_consumed() status.assert_existing_objects_matched() status.assert_nontrivial_match() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_consumed() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status = object_saver.restore(save_path) status.initialize_or_restore() status.assert_nontrivial_match() self._check_sentinels(root) # Check that there is no error when keys are missing from the name-based # checkpoint. root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable( [1.]) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched()
def _test01(self, dtype): with test_util.device(True): x_val = [5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15] shape = [1, 1, 6, 2] x = constant_op.constant(x_val, dtype=dtype, shape=shape) scale = constant_op.constant([4, 5], dtype=float) offset = constant_op.constant([2, 3], dtype=float) batch_norm, batch_mean, batch_var = nn_impl.fused_batch_norm( x, scale, offset, is_training=True) relu = nn_ops.relu(batch_norm) grad = gradients_impl.gradients(relu, x) y1 = array_ops.identity(grad) return (y1, )
def testDegenerateVariable(self): inp = np.random.rand(4, 4).astype("f") with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(inp, [-1, 4], 0)) self.assertAllEqual(result[0], inp[0:0, :]) self.assertAllEqual(result[1], inp[0:4, :]) result = self.evaluate(array_ops.split(inp, [4, -1], 0)) self.assertAllEqual(result[0], inp[0:4, :]) self.assertAllEqual(result[1], inp[4:4, :]) result = self.evaluate(array_ops.split(inp, [-1, 4], 1)) self.assertAllEqual(result[0], inp[:, 0:0]) self.assertAllEqual(result[1], inp[:, 0:4]) result = self.evaluate(array_ops.split(inp, [4, -1], 1)) self.assertAllEqual(result[0], inp[:, 0:4]) self.assertAllEqual(result[1], inp[:, 4:4])
def _test01(self, dtype): with test_util.device(True): x = constant_op.constant( [5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15], dtype=dtype, shape=[1, 1, 6, 2]) scale = constant_op.constant([4, 5], dtype=float) offset = constant_op.constant([2, 3], dtype=float) batch_norm, batch_mean, batch_var = nn_impl.fused_batch_norm( x, scale, offset, is_training=True) relu = nn_ops.relu(batch_norm) y1 = array_ops.identity(relu) y2 = array_ops.identity(batch_mean) y3 = array_ops.identity(batch_var) return (y1, y2, y3)
def _RunAndVerifyVariable(self, dtype, large_num_splits=False): # Random dims of rank 5 shape = np.random.randint(1, 5, size=5) split_dim = np.random.randint(-5, 5) if large_num_splits: num_split = np.random.randint(16, 25) else: num_split = np.random.randint(2, 8) size_splits = np.random.randint(2, 8, num_split, dtype=np.int32) shape[split_dim] = np.sum(size_splits) inp = self._makeData(shape, dtype) with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(inp, size_splits, split_dim)) slices = [slice(0, x) for x in shape] offset = 0 for i in range(num_split): slices[split_dim] = slice(offset, offset + size_splits[i]) offset += size_splits[i] self.assertAllEqual(result[i], inp[slices])
def test_unified_lstm_feature_parity_with_canonical_lstm(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, random_seed=random_seed.DEFAULT_GRAPH_SEED) y_train = keras.utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) masked_input = keras.layers.Masking()(inputs) lstm_layer = rnn_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = lstm_layer(masked_input) lstm_model = keras.models.Model(inputs, output) weights = lstm_model.get_weights() y_1 = lstm_model.predict(x_train) lstm_model.compile('rmsprop', 'mse') lstm_model.fit(x_train, y_train) y_2 = lstm_model.predict(x_train) with test_util.device(use_gpu=True): cudnn_layer = rnn.LSTM(rnn_state_size) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5)
def test_unified_gru_feature_parity_with_canonical_gru(self): with context.eager_mode(): # Run this test under eager only due to b/120160788 for model.set_weights. input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size) y_train = keras.utils.to_categorical(y_train, rnn_state_size) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) gru_layer = keras.layers.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = gru_layer(inputs) gru_model = keras.models.Model(inputs, output) weights = gru_model.get_weights() y_1 = gru_model.predict(x_train) gru_model.compile('rmsprop', 'mse') gru_model.fit(x_train, y_train) y_2 = gru_model.predict(x_train) with test_util.device(use_gpu=True): cudnn_layer = keras.layers.UnifiedGRU( rnn_state_size, recurrent_activation='sigmoid', reset_after=True) cudnn_model = keras.models.Model(inputs, cudnn_layer(inputs)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3) self.assertAllClose(y_2, y_4)