def test_crf_log_likelihood(dtype): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params), ) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods) test_utils.assert_allclose_according_to_type( total_log_likelihood, 0.0, rtol=1e-6, atol=1e-6, half_rtol=2e-3, half_atol=2e-3 ) # check if `transition_params = None` raises an error text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), )
def test_op_forward_pass(dtype): np.random.seed(0) data_width = 7 data_height = 9 data_channels = 5 warp_width = 4 warp_height = 8 batch_size = 10 warp = _make_warp(batch_size, warp_height, warp_width, dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype) data_ph = tf.constant(data) warp_ph = tf.constant(warp) outputs = resampler_ops.resampler(data=data_ph, warp=warp_ph) assert outputs.shape == (10, warp_height, warp_width, data_channels) # Generate reference output via bilinear interpolation in numpy reference_output = np.zeros_like(outputs) for batch in range(batch_size): for c in range(data_channels): reference_output[batch, :, :, c] = _bilinearly_interpolate( data[batch, :, :, c], warp[batch, :, :, 0], warp[batch, :, :, 1] ) test_utils.assert_allclose_according_to_type( outputs, reference_output, half_rtol=5e-3, half_atol=5e-3 )
def test_minimize_sparse_resource_variable_frobenius(dtype, device): if "gpu" in device and dtype == tf.float16: pytest.xfail("See https://github.com/tensorflow/addons/issues/347") var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = opt.minimize(loss, var_list=[var0]) test_utils.assert_allclose_according_to_type( [[ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_0 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_1 / norm0, ]], var0.numpy(), )
def test_minimize_with_2D_indicies_for_embedding_lookup_nuclear(): # This test invokes the ResourceSparseApplyConditionalGradient # operation. var0 = tf.Variable(tf.ones([2, 2])) def loss(): return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]])) # the gradient for this loss function: grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = opt.minimize(loss, var_list=[var0]) # Run 1 step of cg_op test_utils.assert_allclose_according_to_type( [ learning_rate * 1 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], learning_rate * 1 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ], var0[1], )
def test_crf_sequence_score(dtype): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int32), ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), np.array([[4, 5, -3]], dtype=dtype), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([1], dtype=np.int32), ] for sequence_lengths, inputs, tag_indices in zip( sequence_lengths_list, inputs_list, tag_indices_list ): sequence_score = text.crf_sequence_score( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params), ) sequence_score = tf.squeeze(sequence_score, [0]) expected_sequence_score = calculate_sequence_score( inputs, transition_params, tag_indices, sequence_lengths ) test_utils.assert_allclose_according_to_type( sequence_score, expected_sequence_score )
def test_shear_y(dtype): image = np.random.randint(low=0, high=255, size=(4, 4, 3)).astype(dtype.as_numpy_dtype) color = tf.constant([255, 0, 255], tf.int32) level = tf.random.uniform(shape=(), minval=0, maxval=1) tf_image = tf.constant(image) sheared_img = transform_ops.shear_y(image=tf_image, level=level, replace=color) transform_matrix = transform.AffineTransform( np.array([[1, 0, 0], [level.numpy(), 1, 0], [0, 0, 1]])) if dtype == tf.uint8: # uint8 can't represent cval=-1, so we use int32 instead image = image.astype(np.int32) expected_img = transform.warp(image, transform_matrix, order=0, cval=-1, preserve_range=True) mask = np.where(expected_img == -1) expected_img[mask[0], mask[1], :] = color test_utils.assert_allclose_according_to_type(sheared_img.numpy(), expected_img)
def test_with_integer(): boxes1 = tf.constant([[4, 3, 7, 5], [5, 6, 10, 7]], dtype=tf.int32) boxes2 = tf.constant([[3, 4, 6, 8], [14, 14, 15, 15]], dtype=tf.int32) expected_result = tf.constant([1.07500000298023224, 1.9333333373069763], dtype=tf.float32) loss = giou_loss(boxes1, boxes2) test_utils.assert_allclose_according_to_type(loss, expected_result)
def test_forward(input_shape, input_dim, dtype, indices_dtype, combiner): indices = np.random.randint(low=0, high=input_dim, size=input_shape).astype(indices_dtype) params = np.random.random(size=(input_dim, 16)).astype(dtype) if combiner == "sum": weights = np.random.random(size=indices.shape).astype(dtype) else: weights = None expected = manual_embedding_bag(indices, params, weights, combiner=combiner) embedding_bag = EmbeddingBag(input_dim, 16, combiner=combiner, dtype=dtype) embedding_bag.build(indices.shape) embedding_bag.set_weights([params]) indices = tf.convert_to_tensor(indices) if weights is not None: weights = tf.convert_to_tensor(weights) output = embedding_bag( indices, weights, ) test_utils.assert_allclose_according_to_type(expected, output, half_rtol=1e-2, half_atol=1e-2)
def test_op_backward_pass(dtype): np.random.seed(13) data_width = 5 data_height = 4 data_channels = 3 warp_width = 2 warp_height = 6 batch_size = 3 warp = _make_warp(batch_size, warp_height, warp_width, dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype) data_tensor = tf.constant(data) warp_tensor = tf.constant(warp) theoretical, _ = tf.test.compute_gradient( resampler_ops.resampler, [data_tensor, warp_tensor] ) data_tensor_64 = tf.constant(data, dtype=tf.float64) warp_tensor_64 = tf.constant(warp, dtype=tf.float64) _, numerical_64 = tf.test.compute_gradient( resampler_ops.resampler, [data_tensor_64, warp_tensor_64] ) for t, n in zip(theoretical, numerical_64): test_utils.assert_allclose_according_to_type( t, n, float_rtol=5e-5, float_atol=5e-5 )
def test_viterbi_decode(dtype): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = text.crf_sequence_score( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params), ) sequence_score = tf.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) expected_max_sequence_index = np.argmax(all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = all_sequence_scores[expected_max_sequence_index] actual_max_sequence, actual_max_score = text.viterbi_decode( inputs[:sequence_lengths], transition_params ) test_utils.assert_allclose_according_to_type(actual_max_score, expected_max_score) assert actual_max_sequence == expected_max_sequence[:sequence_lengths]
def test_gaussian_filter2d_different_sigma(): image = np.arange(40 * 40).reshape(40, 40).astype(np.float32) sigma = [1.0, 2.0] test_utils.assert_allclose_according_to_type( gaussian_filter2d(image, [9, 17], sigma).numpy(), gaussian_filter(image, sigma, mode="mirror"), )
def test_iou(dtype): boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], dtype=dtype) boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], dtype=dtype) expected_result = tf.constant([0.875, 1.0], dtype=dtype) loss = giou_loss(boxes1, boxes2, mode="iou") test_utils.assert_allclose_according_to_type(loss, expected_result)
def test_sparsemax_of_zero(dtype): """check sparsemax proposition 1, part 1.""" z = np.zeros((1, 10)) tf_sparsemax_out = sparsemax(z.astype(dtype)) np_sparsemax = np.ones_like(z, dtype=dtype) / z.size test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
def test_sharing_frobenius(dtype): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0 ** 2) ** 0.5 norm1 = tf.math.reduce_sum(grads1 ** 2) ** 0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_, ord=ord ) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Because in the eager mode, as we declare two cg_update # variables, it already altomatically finish executing them. # Thus, we cannot test the param value at this time for # eager mode. We can only test the final value of param # after the second execution. # Step 2: the second conditional_gradient contain # the previous update. # Check that the parameters have been updated. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array( [ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ] ), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array( [ (3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ] ), var1.numpy(), )
def test_crf_decode(dtype): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int64), ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), np.array([[-1, 2, 1]], dtype=dtype), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([2], dtype=np.int32), ] for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list, inputs_list, tag_indices_list): num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = text.crf_sequence_score( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params), ) sequence_score = tf.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) expected_max_sequence_index = np.argmax(all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = all_sequence_scores[expected_max_sequence_index] actual_max_sequence, actual_max_score = text.crf_decode( tf.expand_dims(inputs, 0), tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) actual_max_sequence = tf.squeeze(actual_max_sequence, [0]) actual_max_score = tf.squeeze(actual_max_score, [0]) test_utils.assert_allclose_according_to_type(actual_max_score, expected_max_score, 1e-6, 1e-6) assert (list(actual_max_sequence[:sequence_lengths]) == expected_max_sequence[:sequence_lengths])
def test_theoretical_gradients(dtype, approximate): # Only test theoretical gradients for float32 and float64 # because of the instability of float16 while computing jacobian x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient( lambda x: gelu(x, approximate=approximate), [x] ) test_utils.assert_allclose_according_to_type(theoretical, numerical, atol=1e-4)
def test_different_shapes(dtype): boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], dtype=dtype) boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0]], dtype=dtype) expand_boxes1 = tf.expand_dims(boxes1, -2) expand_boxes2 = tf.expand_dims(boxes2, 0) expected_result = tf.constant([1.07500000298023224, 1.366071], dtype=dtype) loss = giou_loss(expand_boxes1, expand_boxes2) test_utils.assert_allclose_according_to_type(loss, expected_result)
def test_hardshrink(dtype): x = tf.constant([-2.0, -0.5, 0.0, 0.5, 2.0], dtype=dtype) expected_result = tf.constant([-2.0, 0.0, 0.0, 0.0, 2.0], dtype=dtype) test_utils.assert_allclose_according_to_type(_hardshrink_custom_op(x), expected_result) expected_result = tf.constant([-2.0, 0.0, 0.0, 0.0, 2.0], dtype=dtype) test_utils.assert_allclose_according_to_type( _hardshrink_custom_op(x, lower=-1.0, upper=1.0), expected_result)
def test_softshrink(dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant([-1.5, -0.5, 0.0, 0.5, 1.5], dtype=dtype) test_utils.assert_allclose_according_to_type(softshrink(x), expected_result) expected_result = tf.constant([-1.0, 0.0, 0.0, 0.0, 1.0], dtype=dtype) test_utils.assert_allclose_according_to_type( softshrink(x, lower=-1.0, upper=1.0), expected_result)
def test_crf_log_norm_zero_seq_length(dtype): """Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.""" inputs = tf.constant(np.ones([2, 10, 5], dtype=dtype)) transition_params = tf.constant(np.ones([5, 5], dtype=dtype)) sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32)) expected_log_norm = np.zeros([2], dtype=dtype) log_norm = text.crf_log_norm(inputs, sequence_lengths, transition_params) test_utils.assert_allclose_according_to_type(log_norm, expected_log_norm)
def test_giou_loss(dtype): boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], dtype=dtype) boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], dtype=dtype) expected_result = tf.constant([1.07500000298023224, 1.9333333373069763], dtype=dtype) loss = giou_loss(boxes1, boxes2) test_utils.assert_allclose_according_to_type(loss, expected_result)
def test_theoretical_gradients(dtype): # Only test theoretical gradients for float32 and float64 # because of the instability of float16 while computing jacobian # Hardshrink is not continuous at `lower` and `upper`. # Avoid these two points to make gradients smooth. x = tf.constant([-2.0, -1.5, 0.0, 1.5, 2.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient(_hardshrink_custom_op, [x]) test_utils.assert_allclose_according_to_type(theoretical, numerical, atol=1e-4)
def test_gelu(dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant( [-0.04540229, -0.158808, 0.0, 0.841192, 1.9545977], dtype=dtype) test_utils.assert_allclose_according_to_type(gelu(x), expected_result) expected_result = tf.constant( [-0.04550028, -0.15865526, 0.0, 0.8413447, 1.9544997], dtype=dtype) test_utils.assert_allclose_according_to_type(gelu(x, False), expected_result)
def test_sparsemax_against_numpy(dtype): """check sparsemax kernel against numpy.""" random = np.random.RandomState(1) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) tf_sparsemax_out = sparsemax(z.astype(dtype)) np_sparsemax = _np_sparsemax(z).astype(dtype) test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
def test_crf_constrained_decode(dtype): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int32) ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), np.array([[4, 5, -3]], dtype=dtype), ] tag_bitmap_list = [ np.array( [ [True, False, False], [False, True, True], [False, True, True], [False, True, True], ], dtype=np.bool, ), np.array([[False, True, True]], dtype=np.bool), ] for sequence_lengths, inputs, tag_bitmap in zip(sequence_lengths_list, inputs_list, tag_bitmap_list): filtered_inputs = text.crf_filtered_inputs( inputs=tf.expand_dims(inputs, 0), tag_bitmap=tf.expand_dims(tag_bitmap, 0)) expected_max_sequence, expected_max_score = text.crf_decode( filtered_inputs, tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) expected_max_sequence = tf.squeeze(expected_max_sequence, [0]) expected_max_score = tf.squeeze(expected_max_score, [0]) actual_max_sequence, actual_max_score = text.crf_constrained_decode( tf.expand_dims(inputs, 0), tf.expand_dims(tag_bitmap, 0), tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) actual_max_sequence = tf.squeeze(actual_max_sequence, [0]) actual_max_score = tf.squeeze(actual_max_score, [0]) test_utils.assert_allclose_according_to_type(actual_max_score, expected_max_score, 1e-6, 1e-6) assert list(actual_max_sequence[:sequence_lengths]) == list( expected_max_sequence[:sequence_lengths])
def test_basic_with_learning_rate_decay(): for i, dtype in enumerate( _dtypes_to_test(use_gpu=test_utils.is_gpu_available())): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 decay = 0.5 lamb_wd = 0.01 opt = lamb.LAMB( learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, weight_decay=lamb_wd, decay=decay, ) # Run 3 steps of LAMB for t in range(3): opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0, lr=lr_np, lamb_wd=lamb_wd) var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1, lr=lr_np, lamb_wd=lamb_wd) # Validate updated params test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def verify_funcs_are_equivalent(dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = tanhshrink(x) y_py = _tanhshrink_py(x) test_utils.assert_allclose_according_to_type(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) test_utils.assert_allclose_according_to_type(grad_native, grad_py)
def test_sparsemax_against_numpy_low_rank(dtype): """check sparsemax kernel against numpy.""" random = np.random.RandomState(1) z = random.uniform(low=-3, high=3, size=(10)) tf_sparsemax_out = sparsemax(z.astype(dtype)).numpy() np_sparsemax = np.reshape(_np_sparsemax(np.reshape(z, [1, 10])), [10]).astype(dtype) test_utils.assert_allclose_according_to_type( np_sparsemax, tf_sparsemax_out, half_atol=5e-3 ) assert np_sparsemax.shape == tf_sparsemax_out.shape
def test_sparsemax_loss_positive(dtype): """check sparsemax-loss proposition 4.""" random = np.random.RandomState(5) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) q = np.zeros((test_obs, 10)) q[np.arange(0, test_obs), random.randint(0, 10, size=test_obs)] = 1 tf_loss_op, tf_loss_out = _tf_sparsemax_loss(z, q, dtype) test_utils.assert_allclose_according_to_type(np.abs(tf_loss_out), tf_loss_out) assert np.zeros(test_obs).shape == tf_loss_op.shape
def test_sparsemax_loss_against_numpy(dtype): """check sparsemax-loss kernel against numpy.""" random = np.random.RandomState(1) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) q = np.zeros((test_obs, 10)) q[np.arange(0, test_obs), random.randint(0, 10, size=test_obs)] = 1 tf_loss_op, tf_loss_out = _tf_sparsemax_loss(z, q, dtype) np_loss = _np_sparsemax_loss(z, q).astype(dtype) test_utils.assert_allclose_according_to_type(np_loss, tf_loss_out) assert np_loss.shape == tf_loss_op.shape