def testInsideFunction(self): if test_util.is_gpu_available(): self.skipTest( "b/123899495: Colocation errors for critical sections in map on GPU") cs = critical_section_ops.CriticalSection() with ops.device("/gpu:0" if test_util.is_gpu_available() else "/cpu:0"): v = resource_variable_ops.ResourceVariable(1) def fn(): return v.read_value() # map() creates a TensorFlow function. ds = dataset_ops.Dataset.range(1) if test_util.is_gpu_available(): ds = (ds.apply(prefetching_ops.copy_to_device("/gpu:0")) .apply(prefetching_ops.map_on_gpu(lambda _: cs.execute(fn)))) else: ds = ds.map(lambda _: cs.execute(fn)) def get_first(): if context.executing_eagerly(): return self.evaluate(ds.make_one_shot_iterator().get_next()) itr = ds.make_initializable_iterator() self.evaluate([v.initializer, itr.initializer]) return self.evaluate(itr.get_next()) self.assertEqual(1, get_first())
def testCondAndTensorArrayInDefun(self): if test_util.is_gpu_available(): old_enable_tensor_array_v2 = tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 # TODO(b/119689663): Enable this. tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = False @function.defun def f(): x = math_ops.range(-5, 5) output = tensor_array_ops.TensorArray(dtype=dtypes.int32, size=x.shape[0]) def loop_body(i, output): def if_true(): return output.write(i, x[i]**2) def if_false(): return output.write(i, x[i]) output = control_flow_ops.cond(x[i] > 0, if_true, if_false) return i + 1, output _, output = control_flow_ops.while_loop( lambda i, arr: i < x.shape[0], loop_body, loop_vars=(constant_op.constant(0), output)) return output.stack() output_t = f() self.assertAllEqual( self.evaluate(output_t), [-5, -4, -3, -2, -1, 0, 1, 4, 9, 16]) if test_util.is_gpu_available(): tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = old_enable_tensor_array_v2
def testContWithPlaceholders(self): if test_util.is_gpu_available(): self.skipTest("b/123446705 this causes a segfault on GPU") with NodeStepper( self.sess, self.y, feed_dict={ self.ph0: [[1.0, 2.0], [-3.0, 5.0]], self.ph1: [[-1.0], [0.5]] }) as stepper: self.assertEqual(4, len(stepper.sorted_nodes())) self.assertSetEqual({"ph0:0", "ph1:0", "x:0", "y:0"}, set(stepper.closure_elements())) result = stepper.cont(self.x) self.assertAllClose([[0.0], [5.5]], result) self.assertEqual({ "ph0:0": NodeStepper.FEED_TYPE_CLIENT, "ph1:0": NodeStepper.FEED_TYPE_CLIENT, }, stepper.last_feed_types()) self.assertEqual(["x:0"], stepper.handle_names()) self.assertSetEqual({"x"}, stepper.handle_node_names()) result = stepper.cont(self.y) self.assertAllClose([[-1.0], [6.0]], result) self.assertEqual({ "x:0": NodeStepper.FEED_TYPE_HANDLE, "ph1:0": NodeStepper.FEED_TYPE_CLIENT, }, stepper.last_feed_types())
def testUsingNamesNotUsingIntermediateTensors(self): if test_util.is_gpu_available(): self.skipTest("b/123446705 this causes a segfault on GPU") with NodeStepper(self.sess, "e:0") as stepper: # The first cont() call should have used no feeds. result = stepper.cont("c:0") self.assertAllClose(6.0, result) self.assertItemsEqual(["a/read:0", "b/read:0"], stepper.intermediate_tensor_names()) self.assertAllClose(2.0, stepper.get_tensor_value("a/read:0")) self.assertAllClose(3.0, stepper.get_tensor_value("b/read:0")) self.assertEqual({}, stepper.last_feed_types()) # The second cont() call should have used the tensor handle from the # previous cont() call. result = stepper.cont("e:0") self.assertAllClose(24.0, result) self.assertItemsEqual(["a/read:0", "b/read:0", "d:0"], stepper.intermediate_tensor_names()) self.assertAllClose(2.0, stepper.get_tensor_value("a/read:0")) self.assertAllClose(3.0, stepper.get_tensor_value("b/read:0")) self.assertAllClose(4.0, stepper.get_tensor_value("d:0")) self.assertEqual({ "c:0": NodeStepper.FEED_TYPE_HANDLE, "a/read:0": NodeStepper.FEED_TYPE_DUMPED_INTERMEDIATE, }, stepper.last_feed_types())
def testCopyToGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with ops.device("/cpu:0"): optional_with_value = optional_ops.Optional.from_value( (constant_op.constant(37.0), constant_op.constant("Foo"), constant_op.constant(42))) optional_none = optional_ops.Optional.none_from_structure( structure.TensorStructure(dtypes.float32, [])) with ops.device("/gpu:0"): gpu_optional_with_value = optional_ops._OptionalImpl( array_ops.identity(optional_with_value._variant_tensor), optional_with_value.value_structure) gpu_optional_none = optional_ops._OptionalImpl( array_ops.identity(optional_none._variant_tensor), optional_none.value_structure) gpu_optional_with_value_has_value = gpu_optional_with_value.has_value() gpu_optional_with_value_values = gpu_optional_with_value.get_value() gpu_optional_none_has_value = gpu_optional_none.has_value() self.assertTrue(self.evaluate(gpu_optional_with_value_has_value)) self.assertEqual((37.0, b"Foo", 42), self.evaluate(gpu_optional_with_value_values)) self.assertFalse(self.evaluate(gpu_optional_none_has_value))
def testCopyToDeviceGpuWithMap(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") def generator(): for i in range(10): yield i, float(i), str(i) host_dataset = dataset_ops.Dataset.from_generator( generator, output_types=(dtypes.int32, dtypes.float32, dtypes.string)) device_dataset = host_dataset.apply( prefetching_ops.copy_to_device("/gpu:0")) def gpu_map_func(x, y, z): return math_ops.square(x), math_ops.square(y), z device_dataset = device_dataset.apply( prefetching_ops.map_on_gpu(gpu_map_func)) options = dataset_ops.Options() options.experimental_autotune = False device_dataset = device_dataset.with_options(options) with ops.device("/gpu:0"): iterator = device_dataset.make_initializable_iterator() next_element = iterator.get_next() with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(10): x, y, z = sess.run(next_element) self.assertEqual(i**2, x) self.assertEqual(float(i**2), y) self.assertEqual(util_compat.as_bytes(str(i)), z) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def test_function_with_captured_dataset(self): if test_util.is_gpu_available(): self.skipTest("Currently broken when a GPU is available.") class HasDataset(module.Module): def __init__(self): super(HasDataset, self).__init__() self.dataset = ( dataset_ops.Dataset.range(5) .map(lambda x: x ** 2)) @def_function.function def __call__(self, x): current_sum = array_ops.zeros([], dtype=dtypes.int64) for element in self.dataset: current_sum += x * element return current_sum root = HasDataset() save_dir = os.path.join(self.get_temp_dir(), "saved_model") save.save( root, save_dir, signatures=root.__call__.get_concrete_function( tensor_spec.TensorSpec(None, dtypes.int64))) self.assertAllClose({"output_0": 3 * (1 + 4 + 9 + 16)}, _import_and_infer(save_dir, {"x": 3}))
def testFunctionWithResourcesOnDifferentDevices(self): if not test_util.is_gpu_available(): self.skipTest("No GPUs available.") with ops.device("/cpu:0"): v_cpu_zero = resource_variable_ops.ResourceVariable( [0.0, 1.0, 2.0], name="v_cpu_zero") with ops.device("/cpu:1"): v_cpu_one = resource_variable_ops.ResourceVariable( [0.0, 1.0, 2.0], name="v_cpu_one") with ops.device("/gpu:0"): v_gpu = resource_variable_ops.ResourceVariable( [0.0, 1.0, 2.0], name="v_gpu") def sum_gather(): cpu_result = math_ops.reduce_sum(array_ops.gather(v_cpu_zero, [1, 2])) also_cpu_result = math_ops.reduce_sum(array_ops.gather(v_cpu_one, [1, 2])) gpu_result = math_ops.reduce_sum(array_ops.gather(v_gpu, [1, 2])) return cpu_result, also_cpu_result, gpu_result defined = function.Defun()(sum_gather) with self.test_session( config=config_pb2.ConfigProto( allow_soft_placement=False, log_device_placement=True, device_count={"CPU": 2})) as sess: self.evaluate(variables.global_variables_initializer()) expected = self.evaluate(sum_gather()) result = sess.run( functional_ops.partitioned_call( args=defined.captured_inputs, f=defined)) self.assertAllEqual(expected, result)
def testIteratorGetNextAsOptionalOnGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") host_dataset = dataset_ops.Dataset.range(3) device_dataset = host_dataset.apply( prefetching_ops.copy_to_device("/gpu:0")) with ops.device("/gpu:0"): iterator = device_dataset.make_initializable_iterator() next_elem = iterator_ops.get_next_as_optional(iterator) elem_has_value_t = next_elem.has_value() elem_value_t = next_elem.get_value() with self.cached_session() as sess: # Before initializing the iterator, evaluating the optional fails with # a FailedPreconditionError. with self.assertRaises(errors.FailedPreconditionError): sess.run(elem_has_value_t) with self.assertRaises(errors.FailedPreconditionError): sess.run(elem_value_t) # For each element of the dataset, assert that the optional evaluates to # the expected value. sess.run(iterator.initializer) for i in range(3): elem_has_value, elem_value = sess.run([elem_has_value_t, elem_value_t]) self.assertTrue(elem_has_value) self.assertEqual(i, elem_value) # After exhausting the iterator, `next_elem.has_value()` will evaluate to # false, and attempting to get the value will fail. for _ in range(2): self.assertFalse(sess.run(elem_has_value_t)) with self.assertRaises(errors.InvalidArgumentError): sess.run(elem_value_t)
def testDifferentDeviceCPUGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") self._prefetch_fn_helper_one_shot("cpu_gpu", "/job:localhost/replica:0/task:0/cpu:0", "/job:localhost/replica:0/task:0/gpu:0")
def testGetNextAsOptionalGpu(self): if not test_util.is_gpu_available() or context.executing_eagerly(): self.skipTest("No GPU available") dataset = dataset_ops.Dataset.range(9) multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/gpu:0"]) elem_on_1, elem_on_2 = multi_device_iterator.get_next_as_optional() elem_on_1_has_value_t = elem_on_1.has_value() elem_on_1_t = elem_on_1.get_value() elem_on_2_has_value_t = elem_on_2.has_value() elem_on_2_t = elem_on_2.get_value() config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1}) with self.test_session(config=config) as sess: self.evaluate(multi_device_iterator.initializer) for i in range(0, 8, 2): elem_on_1_has_value, elem_on_1_value = sess.run( [elem_on_1_has_value_t, elem_on_1_t]) self.assertTrue(elem_on_1_has_value) self.assertEqual(i, elem_on_1_value) elem_on_2_has_value, elem_on_2_value = sess.run( [elem_on_2_has_value_t, elem_on_2_t]) self.assertTrue(elem_on_2_has_value) self.assertEqual(i + 1, elem_on_2_value) elem_on_1_has_value, elem_on_1_value = sess.run( [elem_on_1_has_value_t, elem_on_1_t]) self.assertTrue(elem_on_1_has_value) self.assertEqual(8, elem_on_1_value) self.assertFalse(self.evaluate(elem_on_1_has_value_t)) self.assertFalse(self.evaluate(elem_on_2_has_value_t)) with self.assertRaises(errors.InvalidArgumentError): self.evaluate(elem_on_1_t) with self.assertRaises(errors.InvalidArgumentError): self.evaluate(elem_on_2_t)
def _compareScalar(self, func, x, y, dtype): with self.test_session(force_gpu=test_util.is_gpu_available()): out = func( ops.convert_to_tensor(np.array([x]).astype(dtype)), ops.convert_to_tensor(np.array([y]).astype(dtype))) ret = self.evaluate(out) return ret[0]
def testBadConstructorArgs(self): context.ensure_initialized() ctx = context.context() handle = ctx._handle device = ctx.device_name # Missing context. with self.assertRaisesRegexp( TypeError, r".*argument 'context' \(pos 2\).*"): ops.EagerTensor(1, device=device) # Missing device. with self.assertRaisesRegexp( TypeError, r".*argument 'device' \(pos 3\).*"): ops.EagerTensor(1, context=handle) # Bad dtype type. with self.assertRaisesRegexp(TypeError, "Expecting a DataType value for dtype. Got"): ops.EagerTensor(1, context=handle, device=device, dtype="1") # Following errors happen when trying to copy to GPU. if not test_util.is_gpu_available(): self.skipTest("No GPUs found") with ops.device("/device:GPU:0"): device = ctx.device_name # Bad context. with self.assertRaisesRegexp( TypeError, "Expecting a PyCapsule encoded context handle. Got"): ops.EagerTensor(1.0, context=1, device=device) # Bad device. with self.assertRaisesRegexp( TypeError, "Error parsing device argument to CopyToDevice"): ops.EagerTensor(1.0, context=handle, device=1)
def _generate_synthetic_snli_data_batch(sequence_length, batch_size, vocab_size): """Generate a fake batch of SNLI data for testing.""" with tf.device("cpu:0"): labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64) prem = tf.random_uniform( (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64) prem_trans = tf.constant(np.array( [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 2, 2]] * batch_size, dtype=np.int64).T) hypo = tf.random_uniform( (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64) hypo_trans = tf.constant(np.array( [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 2, 2]] * batch_size, dtype=np.int64).T) if test_util.is_gpu_available(): labels = labels.gpu() prem = prem.gpu() prem_trans = prem_trans.gpu() hypo = hypo.gpu() hypo_trans = hypo_trans.gpu() return labels, prem, prem_trans, hypo, hypo_trans
def testAddN(self): devices = ["/cpu:0"] if test_util.is_gpu_available(): devices.append("/gpu:0") for device in devices: with ops.device(device): # With value opt1 = optional_ops.Optional.from_value((1.0, 2.0)) opt2 = optional_ops.Optional.from_value((3.0, 4.0)) add_tensor = math_ops.add_n([opt1._variant_tensor, opt2._variant_tensor]) add_opt = optional_ops._OptionalImpl(add_tensor, opt1.value_structure) self.assertAllEqual(self.evaluate(add_opt.get_value()), (4.0, 6.0)) # Without value opt_none1 = optional_ops.Optional.none_from_structure( opt1.value_structure) opt_none2 = optional_ops.Optional.none_from_structure( opt2.value_structure) add_tensor = math_ops.add_n([opt_none1._variant_tensor, opt_none2._variant_tensor]) add_opt = optional_ops._OptionalImpl(add_tensor, opt_none1.value_structure) self.assertFalse(self.evaluate(add_opt.has_value()))
def testDeviceBeforeCond(self): with ops.Graph().as_default() as g: with self.session(graph=g): def fn(): self.assertEqual("", constant_op.constant(3.0).op.device) return test_ops.device_placement_op() with ops.device("/device:CPU:0"): self.assertIn( compat.as_bytes("CPU:0"), self.evaluate(cond_v2.cond_v2(constant_op.constant(True), fn, fn))) def fn2(): self.assertEqual("", constant_op.constant(3.0).op.device) return test_ops.device_placement_op() if test_util.is_gpu_available(): with ops.device("/device:GPU:0"): self.assertIn( compat.as_bytes("GPU:0"), self.evaluate(cond_v2.cond_v2(constant_op.constant(True), fn2, fn2))) else: self.skipTest("Test requires a GPU to check GPU device placement.")
def testInstantError(self): if test_util.is_gpu_available(): # TODO(nareshmodi): make this test better self.skipTest("Gather doesn't do index checking on GPUs") with self.assertRaisesRegexp(errors.InvalidArgumentError, r'indices = 7 is not in \[0, 3\)'): array_ops.gather([0, 1, 2], 7)
def _compare(self, c, x, y, use_gpu): np_ans = np.where(c, x, y) with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()): out = array_ops.where(c, x, y) tf_ans = self.evaluate(out) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def testNoShape(self): with self.test_session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([17], shape=None) a = array_ops.identity(p) self.assertAllEqual([17], a.eval()) self.assertAllEqual([3, 37], a.eval(feed_dict={p: [3, 37]})) self.assertAllEqual( [[3, 3], [3, 3]], a.eval(feed_dict={p: [[3, 3], [3, 3]]}))
def _compareGpu(self, x, y, np_func, tf_func): np_ans = np_func(x, y) with self.test_session(force_gpu=test_util.is_gpu_available()): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = tf_func(inx, iny) tf_gpu = out.eval() self.assertAllClose(np_ans, tf_gpu) self.assertShapeEqual(np_ans, out)
def _compareConj(self, cplx, use_gpu): np_ans = np.conj(cplx) with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()): inx = ops.convert_to_tensor(cplx) tf_conj = math_ops.conj(inx) tf_ans = self.evaluate(tf_conj) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, tf_conj)
def _compareGpu(self, x, np_func, tf_func): np_ans = np_func(x) with self.test_session(force_gpu=test_util.is_gpu_available()): result = tf_func(ops.convert_to_tensor(x)) tf_gpu = result.eval() if x.dtype == np.float16: self.assertAllClose(np_ans, tf_gpu, rtol=1e-3, atol=1e-3) else: self.assertAllClose(np_ans, tf_gpu)
def testPartialShape(self): with self.test_session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([1, 2, 3], shape=[None]) a = array_ops.identity(p) self.assertAllEqual([1, 2, 3], a.eval()) self.assertAllEqual([3, 37], a.eval(feed_dict={p: [3, 37]})) with self.assertRaises(ValueError): a.eval(feed_dict={p: [[2, 2], [2, 2]]})
def _not(self, x, use_gpu=False): np_ans = np.logical_not(x) with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()): out = math_ops.logical_not(ops.convert_to_tensor(x)) tf_val = self.evaluate(out) self.assertEqual(out.dtype, dtypes_lib.bool) self.assertAllEqual(np_ans, tf_val) self.assertShapeEqual(np_ans, out)
def _compareAngle(self, cplx, use_gpu): np_angle = np.angle(cplx) with self.test_session( use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()) as sess: inx = ops.convert_to_tensor(cplx) tf_angle = math_ops.angle(inx) tf_angle_val = sess.run(tf_angle) self.assertAllEqual(np_angle, tf_angle_val) self.assertShapeEqual(np_angle, tf_angle)
def _compare(self, c, x, y, use_gpu): np_ans = np.dstack( [x_i if c_i else y_i for c_i, x_i, y_i in zip(c, x, y)]).transpose( [2, 0, 1]) with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()): out = array_ops.where(c, x, y) tf_ans = self.evaluate(out) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def testFullShape(self): with self.session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([[2, 2], [2, 2]], shape=[2, 2]) a = array_ops.identity(p) self.assertAllEqual([[2, 2], [2, 2]], self.evaluate(a)) self.assertAllEqual( [[3, 3], [3, 3]], a.eval(feed_dict={p: [[3, 3], [3, 3]]})) with self.assertRaises(ValueError): a.eval(feed_dict={p: [[6, 6, 6], [6, 6, 6]]})
def _compareMake(self, real, imag, use_gpu): np_ans = real + (1j) * imag with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu and test_util.is_gpu_available()): real = ops.convert_to_tensor(real) imag = ops.convert_to_tensor(imag) tf_ans = math_ops.complex(real, imag) out = self.evaluate(tf_ans) self.assertAllEqual(np_ans, out) self.assertShapeEqual(np_ans, tf_ans)
def testInt64GPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): x = constant_op.constant([1., 2., 3.]) begin = constant_op.constant([2], dtype=dtypes.int64) end = constant_op.constant([3], dtype=dtypes.int64) strides = constant_op.constant([1], dtype=dtypes.int64) s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s))
def testRemoteIteratorUsingRemoteCallOpDirectSessionGPUCPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with ops.device("/job:localhost/replica:0/task:0/cpu:0"): dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3]) iterator_3 = dataset_ops.make_one_shot_iterator(dataset_3) iterator_3_handle = iterator_3.string_handle() def _encode_raw(byte_array): return bytes(bytearray(byte_array)) @function.Defun(dtypes.uint8) def _remote_fn(h): handle = script_ops.py_func(_encode_raw, [h], dtypes.string) remote_iterator = iterator_ops.Iterator.from_string_handle( handle, dataset_ops.get_legacy_output_types(dataset_3), dataset_ops.get_legacy_output_shapes(dataset_3)) return remote_iterator.get_next() with ops.device("/job:localhost/replica:0/task:0/device:GPU:0"): target_placeholder = array_ops.placeholder(dtypes.string, shape=[]) iterator_3_handle_uint8 = parsing_ops.decode_raw( bytes=iterator_3_handle, out_type=dtypes.uint8) remote_op = functional_ops.remote_call( args=[iterator_3_handle_uint8], Tout=[dtypes.int32], f=_remote_fn, target=target_placeholder) with self.cached_session() as sess: elem = sess.run( remote_op, feed_dict={ target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) self.assertEqual(elem, [1]) elem = sess.run( remote_op, feed_dict={ target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) self.assertEqual(elem, [2]) elem = sess.run( remote_op, feed_dict={ target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) self.assertEqual(elem, [3]) with self.assertRaises(errors.OutOfRangeError): sess.run( remote_op, feed_dict={ target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" })
def test_embedding_lookup_unique(self): dim = 5 n = 10 embeddings_de = de.get_variable("t_unique_001", dtypes.int64, dtypes.float32, dim=dim) ids_shape = (2, 3, 4) embeddings_np = np.random.randn(n, dim) ids = np.random.randint(0, n, ids_shape) with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): self.evaluate(embeddings_de.upsert(range(n), embeddings_np)) embedded_np = embeddings_np[ids] embedded_de = de.embedding_lookup_unique(embeddings_de, ids).eval() self.assertEqual(embedded_np.shape, embedded_de.shape) np.testing.assert_almost_equal(embedded_np, embedded_de)
def test_signature_mismatch(self): config = config_pb2.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True with self.session(config=config, use_gpu=test_util.is_gpu_available()): default_val = -1 keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0], [1], [2]], dtypes.int32) table = de.get_variable("t210", dtypes.int64, dtypes.int32, initializer=default_val) # upsert with keys of the wrong type with self.assertRaises(ValueError): self.evaluate( table.upsert(constant_op.constant([4.0, 5.0, 6.0], dtypes.float32), values)) # upsert with values of the wrong type with self.assertRaises(ValueError): self.evaluate(table.upsert(keys, constant_op.constant(["a", "b", "c"]))) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) remove_keys_ref = variables.Variable(0, dtype=dtypes.int64) input_int64_ref = variables.Variable([-1], dtype=dtypes.int32) self.evaluate(variables.global_variables_initializer()) # Ref types do not produce an upsert signature mismatch. self.evaluate(table.upsert(remove_keys_ref, input_int64_ref)) self.assertAllEqual(3, self.evaluate(table.size())) # Ref types do not produce a lookup signature mismatch. self.assertEqual([-1], self.evaluate(table.lookup(remove_keys_ref))) # lookup with keys of the wrong type remove_keys = constant_op.constant([1, 2, 3], dtypes.int32) with self.assertRaises(ValueError): self.evaluate(table.lookup(remove_keys))
def test_dynamic_embedding_variable_with_random_init(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0.0], [1.0], [2.0]], dtypes.float32) default_val = init_ops.random_uniform_initializer() table = de.get_variable("t230", dtypes.int64, dtypes.float32, initializer=default_val) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) remove_keys = constant_op.constant([0, 1, 3], dtypes.int64) output = table.lookup(remove_keys) result = self.evaluate(output) self.assertNotEqual([-1.0], result[2])
def testContToValidNodeShouldUpdateStatus(self): if test_util.is_gpu_available(): self.skipTest("b/123446705 this causes a segfault on GPU") with stepper.NodeStepper(self.sess, self.e) as node_stepper: cli = stepper_cli.NodeStepperCLI(node_stepper) output = cli.list_sorted_nodes([]) node_names, stat_labels, node_pointer = _parse_sorted_nodes_list( output.lines) index_c = node_names.index("c") self.assertEqual(" ", stat_labels[index_c]) self.assertEqual(0, node_pointer) output = cli.cont("c") self.assertIsNone(_parse_updated(output.lines)) node_names, stat_labels, node_pointer = _parse_sorted_nodes_list( output.lines) self.assertGreaterEqual(len(node_names), 3) self.assertIn("c", node_names) index_c = node_names.index("c") self.assertEqual(index_c, node_pointer) self.assertIn(stepper_cli.NodeStepperCLI.STATE_CONT, stat_labels[index_c]) output = cli.cont("d") self.assertIsNone(_parse_updated(output.lines)) node_names, stat_labels, node_pointer = _parse_sorted_nodes_list( output.lines) used_feed_types = _parsed_used_feeds(output.lines) self.assertEqual({ "c:0": stepper.NodeStepper.FEED_TYPE_HANDLE, "a/read:0": stepper.NodeStepper.FEED_TYPE_DUMPED_INTERMEDIATE, }, used_feed_types) self.assertGreaterEqual(len(node_names), 3) self.assertIn("d", node_names) index_d = node_names.index("d") self.assertEqual(index_d, node_pointer) self.assertIn(stepper_cli.NodeStepperCLI.STATE_CONT, stat_labels[index_d])
def testPrefetchToTwoDevicesInAList(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") host_dataset = dataset_ops.Dataset.range(10) device_dataset = host_dataset.apply( prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) iterator = device_dataset.make_one_shot_iterator() next_element = iterator.get_next() output = [] with self.test_session() as sess: for _ in range(5): result = sess.run(next_element) self.assertEqual(2, len(result)) output.extend(result) self.assertEquals(set(range(10)), set(output)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def testPrefetchToTwoDevicesInAList(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") host_dataset = dataset_ops.Dataset.range(10) device_dataset = host_dataset.apply( prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) iterator = device_dataset.make_one_shot_iterator() next_element = iterator.get_next() output = [] # TODO(rohanj): Modify test to go till the end of the dataset when we # switch to MultiDeviceIterator. with self.cached_session() as sess: for _ in range(4): result = sess.run(next_element) self.assertEqual(2, len(result)) output.extend(result) self.assertEquals(set(range(8)), set(output))
def testPrefetchToTwoDevicesWithReinit(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") host_dataset = dataset_ops.Dataset.range(10) device_dataset = host_dataset.apply( prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) iterator = device_dataset.make_initializable_iterator() next_element = iterator.get_next() # TODO(rohanj): Modify test to go till the end of the dataset when we # switch to MultiDeviceIterator. with self.cached_session() as sess: sess.run(iterator.initializer) for _ in range(4): sess.run(next_element) sess.run(iterator.initializer) for _ in range(4): sess.run(next_element)
def test_max_norm_nontrivial(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): embeddings = de.get_variable("t320", dtypes.int64, dtypes.float32, initializer=2.0, dim=2) fake_values = constant_op.constant([[2.0, 4.0], [3.0, 1.0]]) ids = constant_op.constant([0, 1], dtype=dtypes.int64) self.evaluate(embeddings.upsert(ids, fake_values)) embedding_no_norm = de.embedding_lookup(embeddings, ids) embedding = de.embedding_lookup(embeddings, ids, max_norm=2.0) norms = math_ops.sqrt( math_ops.reduce_sum(embedding_no_norm * embedding_no_norm, axis=1)) normalized = embedding_no_norm / array_ops.stack([norms, norms], axis=1) self.assertAllEqual(embedding.eval(), 2 * self.evaluate(normalized))
def testNcclBroadcastDoubleSend(self): tensor_value = [0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1] group_key = 1 instance_key = 1 devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] with self.session(config=self._configure()) as sess: if not test_util.is_gpu_available(cuda_only=True): self.skipTest('No GPU available') collectives = [] for device in devices: with ops.device(device): t = constant_op.constant(tensor_value) collectives.append( collective_ops.broadcast_send(t, t.shape, t.dtype, self._group_size, group_key, instance_key)) with self.assertRaisesRegexp(errors.InternalError, 'already has source'): sess.run(collectives)
def testPrefetchToTwoDevicesWithReinit(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") host_dataset = dataset_ops.Dataset.range(10) device_dataset = host_dataset.apply( prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) iterator = device_dataset.make_initializable_iterator() next_element = iterator.get_next() with self.test_session() as sess: sess.run(iterator.initializer) for _ in range(5): sess.run(next_element) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) sess.run(iterator.initializer) for _ in range(5): sess.run(next_element)