def testTypePromotion(self): x_np = np.ones([1, 2], dtype=np.int16) + np.ones([2, 1], dtype=np.uint8) x_onp = np.ones([1, 2], dtype=np.int16) + np.ones([2, 1], dtype=np.uint8) self.assertEqual(x_onp.dtype, x_np.dtype) self.assertAllClose(x_onp, x_np)
def test_tf_conv_general_dilated(self, lhs_shape, rhs_shape, strides, padding, lhs_dilation, rhs_dilation, feature_group_count, batch_group_count, dimension_numbers, perms): tf.print("dimension_numbers: {}".format(dimension_numbers), output_stream=sys.stdout) lhs_perm, rhs_perm = perms # permute to compatible shapes lhs_tf = tfnp.transpose(tfnp.ones(lhs_shape), lhs_perm) rhs_tf = tfnp.transpose(tfnp.ones(rhs_shape), rhs_perm) lhs_jax = jnp.transpose(jnp.ones(lhs_shape), lhs_perm) rhs_jax = jnp.transpose(jnp.ones(rhs_shape), rhs_perm) jax_conv = jax.lax.conv_general_dilated( lhs_jax, rhs_jax, strides, padding, lhs_dilation, rhs_dilation, dimension_numbers, feature_group_count, batch_group_count) tf_conv = lax.conv_general_dilated(lhs_tf, rhs_tf, strides, padding, jax_conv.shape, lhs_dilation, rhs_dilation, dimension_numbers, feature_group_count, batch_group_count) self.assertAllEqual(tf_conv, tfnp.asarray(jax_conv))
def testDevice(self): if tf.test.is_gpu_available(): with tf.device('GPU:0'): x = np.ones([1, 2]) self.assertIn('GPU', tf.convert_to_tensor(x).device) with tf.device('CPU:0'): x = np.ones([1, 2]) self.assertIn('CPU', tf.convert_to_tensor(x).device)
def testSize(self): def run_test(arr): onp_arr = arr.numpy() if isinstance(arr, tf.Tensor) else arr print(onp_arr) self.assertEqual(np_size(arr), onp.size(onp_arr)) run_test(np.array([1])) run_test(np.array([1, 2, 3, 4, 5])) run_test(np.ones((2, 3, 2))) run_test(np.ones((3, 2))) run_test(np.zeros((5, 6, 7))) run_test(1) run_test(onp.ones((3, 2, 1))) run_test(tf.constant(5)) run_test(tf.constant([1, 1, 1]))
def testFunction(self): @tf.function def f(x, y): return np.sum(x + y) x_np = f(np.ones([1, 2]), tf.ones([2, 1])) x_onp = onp.sum(onp.ones([1, 2]) + onp.ones([2, 1])) self.assertAllClose(x_onp, x_np)
def ntk_fn(x1: np.ndarray, x2: Optional[np.ndarray], params: PyTree, keys: Union[PRNGKey, Tuple[PRNGKey, PRNGKey], np.ndarray] = None, **apply_fn_kwargs) -> np.ndarray: """Computes a single sample of the empirical NTK (implicit differentiation). Args: x1: first batch of inputs. x2: second batch of inputs. `x2=None` means `x2=x1`. `f(x2)` must have a matching shape with `f(x1)` on `trace_axes` and `diagonal_axes`. params: A `PyTree` of parameters about which we would like to compute the neural tangent kernel. keys: `None` or a PRNG key or a tuple of PRNG keys or a (2, 2) array of dtype `uint32`. If `key=None`, then the function `f` is deterministic and requires no PRNG key; else if `keys` is a single PRNG key, then `x1` and `x2` must be the same and share the same PRNG key; else `x1` and `x2` use two different PRNG keys. **apply_fn_kwargs: keyword arguments passed to `apply_fn`. Returns: A single sample of the empirical NTK. The shape of the kernel is "almost" `zip(f(x1).shape, f(x2).shape)` except for: 1) `trace_axes` are absent as they are contracted over. 2) `diagonal_axes` are present only once. All other axes are present twice. """ key1, key2 = _read_keys(keys) # TODO(xlc): find a good way to check utils.x1_is_x2(x1, x2) == (key1==key2) f1 = _get_f_params(f, x1, key1, **apply_fn_kwargs) f2 = f1 if x2 is None else _get_f_params(f, x2, key2, **apply_fn_kwargs) def delta_vjp_jvp(delta): def delta_vjp(delta): return vjp(f2, params)[1](delta) return _jvp(f1, _tf_to_np((params,)), delta_vjp(delta))[1] # Since we are taking the Jacobian of a linear function (which does not # depend on its coefficients), it is more efficient to substitute fx_dummy # for the outputs of the network. fx_dummy has the same shape as the output # of the network on a single piece of input data. fx2_struct = eval_on_shapes(f2)(params) fx_dummy = np.ones(fx2_struct.shape, dtype=tf.float32) # ntk = jacobian(delta_vjp_jvp)(fx_dummy) with tf.GradientTape() as tape: tape.watch(fx_dummy.data) y = delta_vjp_jvp(fx_dummy.data) ntk = np.array(tape.jacobian(y, fx_dummy.data)) return _index_and_contract(ntk, trace_axes, diagonal_axes)
def testPForInterop(self): def outer_product(a): return np.tensordot(a, a, 0) batch_size = 100 a = np.ones((batch_size, 32, 32)) c = tf.vectorized_map(outer_product, a) self.assertIsInstance(c, np.ndarray) self.assertEqual(c.shape, (batch_size, 32, 32, 32, 32))
def testPForInterop(self): def outer_product(a): return np.tensordot(a, a, 0) batch_size = 100 a = np.ones((batch_size, 32, 32)) c = tf.vectorized_map(outer_product, a) # # TODO(nareshmodi): vectorized_map doesn't rewrap tensors in ndarray. # self.assertIsInstance(c, np.ndarray) self.assertEqual(c.shape, (batch_size, 32, 32, 32, 32))
def ntk_fn(x1: np.ndarray, x2: Optional[np.ndarray], params: PyTree, **apply_fn_kwargs) -> np.ndarray: """Computes a single sample of the empirical NTK (implicit differentiation). Args: x1: first batch of inputs. x2: second batch of inputs. `x2=None` means `x2=x1`. `f(x2)` must have a matching shape with `f(x1)` on `trace_axes` and `diagonal_axes`. params: A `PyTree` of parameters about which we would like to compute the neural tangent kernel. **apply_fn_kwargs: keyword arguments passed to `apply_fn`. `apply_fn_kwargs` will be split into `apply_fn_kwargs1` and `apply_fn_kwargs2` by the `_split_kwargs` function which will be passed to `apply_fn`. In particular, the rng key in `apply_fn_kwargs`, will be split into two different (if `x1 != x2`) or same (if `x1 == x2`) rng keys. See the `_read_key` function for more details. Returns: A single sample of the empirical NTK. The shape of the kernel is "almost" `zip(f(x1).shape, f(x2).shape)` except for: 1) `trace_axes` are absent as they are contracted over. 2) `diagonal_axes` are present only once. All other axes are present twice. """ apply_fn_kwargs1, apply_fn_kwargs2 = _split_kwargs(apply_fn_kwargs, x1, x2) f1 = _get_f_params(f, x1, **apply_fn_kwargs1) f2 = f1 if x2 is None else _get_f_params(f, x2, **apply_fn_kwargs2) def delta_vjp_jvp(delta): def delta_vjp(delta): return vjp(f2, params)[1](delta) return _jvp(f1, _tf_to_np((params,)), delta_vjp(delta))[1] # Since we are taking the Jacobian of a linear function (which does not # depend on its coefficients), it is more efficient to substitute fx_dummy # for the outputs of the network. fx_dummy has the same shape as the output # of the network on a single piece of input data. fx2_struct = eval_on_shapes(f2)(params) fx_dummy = np.ones(fx2_struct.shape, fx2_struct.dtype) with tf.GradientTape() as tape: tape.watch(fx_dummy.data) y = delta_vjp_jvp(fx_dummy.data) ntk = np.asarray(tape.jacobian(y, fx_dummy.data)) return _trace_and_diagonal(ntk, trace_axes, diagonal_axes)
def rescale(outputs, inputs, spec): if spec is None: non_spatial_axes = 0, inputs.ndim - 1 else: non_spatial_axes = spec.index('N'), spec.index('C') spatial_shape = tuple(inputs.shape[i] for i in range(inputs.ndim) if i not in non_spatial_axes) one = tfnp.ones(spatial_shape, dtype=inputs.dtype) window_sizes = lax.reduce_window(one, 0., tfnp.add, dims, strides, padding) for i in sorted(non_spatial_axes): window_sizes = tfnp.expand_dims(window_sizes, i) return outputs / window_sizes
def run_test(*args): num_samples = 1000 tol = 0.1 # High tolerance to keep the # of samples low else the test # takes a long time to run. np_random.seed(10) outputs = [np_random.randn(*args) for _ in range(num_samples)] # Test output shape. for output in outputs: self.assertEqual(output.shape, tuple(args)) default_dtype = (np.float64 if np_dtypes.is_allow_float64() else np.float32) self.assertEqual(output.dtype.as_numpy_dtype, default_dtype) if np.prod(args): # Don't bother with empty arrays. outputs = [output.tolist() for output in outputs] # Test that the properties of normal distribution are satisfied. mean = np.mean(outputs, axis=0) stddev = np.std(outputs, axis=0) self.assertAllClose(mean, np.zeros(args), atol=tol) self.assertAllClose(stddev, np.ones(args), atol=tol) # Test that outputs are different with different seeds. np_random.seed(20) diff_seed_outputs = [ np_random.randn(*args).tolist() for _ in range(num_samples) ] self.assertNotAllClose(outputs, diff_seed_outputs) # Test that outputs are the same with the same seed. np_random.seed(10) same_seed_outputs = [ np_random.randn(*args).tolist() for _ in range(num_samples) ] self.assertAllClose(outputs, same_seed_outputs)
def testOnpInterop(self): x_np = onp.sum(np.ones([1, 2]) + onp.ones([2, 1])) x_onp = onp.sum(onp.ones([1, 2]) + onp.ones([2, 1])) self.assertAllClose(x_onp, x_np)
def testBroadcastAdd(self): x_np = np.ones([2, 1]) + np.ones([1, 2]) x_onp = onp.ones([2, 1]) + onp.ones([1, 2]) self.assertAllClose(x_onp, x_np)
def test_broadcast(self, low_shape, high_shape, size): low = np.zeros(low_shape).astype(np.float64) high = np.ones(high_shape).astype(np.float64) self._test(low=low, high=high, size=size)
def benchmark_tf_np_tf_function_mlp_inference_batch_1_cpu(self): with tf.device('/CPU:0'): model = tf_numpy_mlp.MLP() x = tfnp.ones(shape=(1, 10)).astype(np.float32) self._benchmark_and_report(self._get_name(), tf.function(lambda: model.inference(x)))