示例#1
0
def test_crf_log_likelihood(dtype):
    inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype)
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype)
    sequence_lengths = np.array(3, dtype=np.int32)

    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    all_sequence_log_likelihoods = []

    # Make sure all probabilities sum to 1.
    for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        sequence_log_likelihood, _ = text.crf_log_likelihood(
            inputs=tf.expand_dims(inputs, 0),
            tag_indices=tf.expand_dims(tag_indices, 0),
            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
            transition_params=tf.constant(transition_params),
        )
        all_sequence_log_likelihoods.append(sequence_log_likelihood)
    total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods)
    test_utils.assert_allclose_according_to_type(
        total_log_likelihood, 0.0, rtol=1e-6, atol=1e-6, half_rtol=2e-3, half_atol=2e-3
    )

    # check if `transition_params = None` raises an error
    text.crf_log_likelihood(
        inputs=tf.expand_dims(inputs, 0),
        tag_indices=tf.expand_dims(tag_indices, 0),
        sequence_lengths=tf.expand_dims(sequence_lengths, 0),
    )
示例#2
0
def test_op_forward_pass(dtype):
    np.random.seed(0)
    data_width = 7
    data_height = 9
    data_channels = 5
    warp_width = 4
    warp_height = 8
    batch_size = 10

    warp = _make_warp(batch_size, warp_height, warp_width, dtype)
    data_shape = (batch_size, data_height, data_width, data_channels)
    data = np.random.rand(*data_shape).astype(dtype)
    data_ph = tf.constant(data)
    warp_ph = tf.constant(warp)
    outputs = resampler_ops.resampler(data=data_ph, warp=warp_ph)
    assert outputs.shape == (10, warp_height, warp_width, data_channels)

    # Generate reference output via bilinear interpolation in numpy
    reference_output = np.zeros_like(outputs)
    for batch in range(batch_size):
        for c in range(data_channels):
            reference_output[batch, :, :, c] = _bilinearly_interpolate(
                data[batch, :, :, c], warp[batch, :, :, 0], warp[batch, :, :, 1]
            )

    test_utils.assert_allclose_according_to_type(
        outputs, reference_output, half_rtol=5e-3, half_atol=5e-3
    )
示例#3
0
def test_minimize_sparse_resource_variable_frobenius(dtype, device):
    if "gpu" in device and dtype == tf.float16:
        pytest.xfail("See https://github.com/tensorflow/addons/issues/347")
    var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)

    def loss():
        x = tf.constant([[4.0], [5.0]], dtype=dtype)
        pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x)
        return pred * pred

    # the gradient based on the current loss function
    grads0_0 = 32 * 1.0 + 40 * 2.0
    grads0_1 = 40 * 1.0 + 50 * 2.0
    grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype)
    norm0 = tf.math.reduce_sum(grads0**2)**0.5

    learning_rate = 0.1
    lambda_ = 0.1
    ord = "fro"
    opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                     lambda_=lambda_,
                                     ord=ord)
    _ = opt.minimize(loss, var_list=[var0])
    test_utils.assert_allclose_according_to_type(
        [[
            1.0 * learning_rate -
            (1 - learning_rate) * lambda_ * grads0_0 / norm0,
            2.0 * learning_rate -
            (1 - learning_rate) * lambda_ * grads0_1 / norm0,
        ]],
        var0.numpy(),
    )
示例#4
0
def test_minimize_with_2D_indicies_for_embedding_lookup_nuclear():
    # This test invokes the ResourceSparseApplyConditionalGradient
    # operation.
    var0 = tf.Variable(tf.ones([2, 2]))

    def loss():
        return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]]))

    # the gradient for this loss function:
    grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32)
    top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(
        grads0)

    learning_rate = 0.1
    lambda_ = 0.1
    ord = "nuclear"
    opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                     lambda_=lambda_,
                                     ord=ord)
    _ = opt.minimize(loss, var_list=[var0])

    # Run 1 step of cg_op
    test_utils.assert_allclose_according_to_type(
        [
            learning_rate * 1 -
            (1 - learning_rate) * lambda_ * top_singular_vector0[1][0],
            learning_rate * 1 -
            (1 - learning_rate) * lambda_ * top_singular_vector0[1][1],
        ],
        var0[1],
    )
示例#5
0
def test_crf_sequence_score(dtype):
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype)
    # Test both the length-1 and regular cases.
    sequence_lengths_list = [
        np.array(3, dtype=np.int32),
        np.array(1, dtype=np.int32),
    ]
    inputs_list = [
        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype),
        np.array([[4, 5, -3]], dtype=dtype),
    ]
    tag_indices_list = [
        np.array([1, 2, 1, 0], dtype=np.int32),
        np.array([1], dtype=np.int32),
    ]
    for sequence_lengths, inputs, tag_indices in zip(
        sequence_lengths_list, inputs_list, tag_indices_list
    ):
        sequence_score = text.crf_sequence_score(
            inputs=tf.expand_dims(inputs, 0),
            tag_indices=tf.expand_dims(tag_indices, 0),
            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
            transition_params=tf.constant(transition_params),
        )
        sequence_score = tf.squeeze(sequence_score, [0])

        expected_sequence_score = calculate_sequence_score(
            inputs, transition_params, tag_indices, sequence_lengths
        )
        test_utils.assert_allclose_according_to_type(
            sequence_score, expected_sequence_score
        )
示例#6
0
def test_shear_y(dtype):
    image = np.random.randint(low=0, high=255,
                              size=(4, 4, 3)).astype(dtype.as_numpy_dtype)
    color = tf.constant([255, 0, 255], tf.int32)
    level = tf.random.uniform(shape=(), minval=0, maxval=1)

    tf_image = tf.constant(image)
    sheared_img = transform_ops.shear_y(image=tf_image,
                                        level=level,
                                        replace=color)
    transform_matrix = transform.AffineTransform(
        np.array([[1, 0, 0], [level.numpy(), 1, 0], [0, 0, 1]]))
    if dtype == tf.uint8:
        # uint8 can't represent cval=-1, so we use int32 instead
        image = image.astype(np.int32)
    expected_img = transform.warp(image,
                                  transform_matrix,
                                  order=0,
                                  cval=-1,
                                  preserve_range=True)

    mask = np.where(expected_img == -1)
    expected_img[mask[0], mask[1], :] = color

    test_utils.assert_allclose_according_to_type(sheared_img.numpy(),
                                                 expected_img)
示例#7
0
def test_with_integer():
    boxes1 = tf.constant([[4, 3, 7, 5], [5, 6, 10, 7]], dtype=tf.int32)
    boxes2 = tf.constant([[3, 4, 6, 8], [14, 14, 15, 15]], dtype=tf.int32)
    expected_result = tf.constant([1.07500000298023224, 1.9333333373069763],
                                  dtype=tf.float32)
    loss = giou_loss(boxes1, boxes2)
    test_utils.assert_allclose_according_to_type(loss, expected_result)
示例#8
0
def test_forward(input_shape, input_dim, dtype, indices_dtype, combiner):
    indices = np.random.randint(low=0, high=input_dim,
                                size=input_shape).astype(indices_dtype)
    params = np.random.random(size=(input_dim, 16)).astype(dtype)
    if combiner == "sum":
        weights = np.random.random(size=indices.shape).astype(dtype)
    else:
        weights = None
    expected = manual_embedding_bag(indices,
                                    params,
                                    weights,
                                    combiner=combiner)
    embedding_bag = EmbeddingBag(input_dim, 16, combiner=combiner, dtype=dtype)
    embedding_bag.build(indices.shape)
    embedding_bag.set_weights([params])
    indices = tf.convert_to_tensor(indices)
    if weights is not None:
        weights = tf.convert_to_tensor(weights)
    output = embedding_bag(
        indices,
        weights,
    )
    test_utils.assert_allclose_according_to_type(expected,
                                                 output,
                                                 half_rtol=1e-2,
                                                 half_atol=1e-2)
示例#9
0
def test_op_backward_pass(dtype):
    np.random.seed(13)
    data_width = 5
    data_height = 4
    data_channels = 3
    warp_width = 2
    warp_height = 6
    batch_size = 3

    warp = _make_warp(batch_size, warp_height, warp_width, dtype)
    data_shape = (batch_size, data_height, data_width, data_channels)
    data = np.random.rand(*data_shape).astype(dtype)
    data_tensor = tf.constant(data)
    warp_tensor = tf.constant(warp)
    theoretical, _ = tf.test.compute_gradient(
        resampler_ops.resampler, [data_tensor, warp_tensor]
    )
    data_tensor_64 = tf.constant(data, dtype=tf.float64)
    warp_tensor_64 = tf.constant(warp, dtype=tf.float64)
    _, numerical_64 = tf.test.compute_gradient(
        resampler_ops.resampler, [data_tensor_64, warp_tensor_64]
    )

    for t, n in zip(theoretical, numerical_64):
        test_utils.assert_allclose_according_to_type(
            t, n, float_rtol=5e-5, float_atol=5e-5
        )
示例#10
0
def test_viterbi_decode(dtype):
    inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype)
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype)
    sequence_lengths = np.array(3, dtype=np.int32)
    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]

    all_sequence_scores = []
    all_sequences = []

    # Compare the dynamic program with brute force computation.
    for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        all_sequences.append(tag_indices)
        sequence_score = text.crf_sequence_score(
            inputs=tf.expand_dims(inputs, 0),
            tag_indices=tf.expand_dims(tag_indices, 0),
            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
            transition_params=tf.constant(transition_params),
        )
        sequence_score = tf.squeeze(sequence_score, [0])
        all_sequence_scores.append(sequence_score)

    expected_max_sequence_index = np.argmax(all_sequence_scores)
    expected_max_sequence = all_sequences[expected_max_sequence_index]
    expected_max_score = all_sequence_scores[expected_max_sequence_index]

    actual_max_sequence, actual_max_score = text.viterbi_decode(
        inputs[:sequence_lengths], transition_params
    )

    test_utils.assert_allclose_according_to_type(actual_max_score, expected_max_score)
    assert actual_max_sequence == expected_max_sequence[:sequence_lengths]
示例#11
0
def test_gaussian_filter2d_different_sigma():
    image = np.arange(40 * 40).reshape(40, 40).astype(np.float32)
    sigma = [1.0, 2.0]

    test_utils.assert_allclose_according_to_type(
        gaussian_filter2d(image, [9, 17], sigma).numpy(),
        gaussian_filter(image, sigma, mode="mirror"),
    )
示例#12
0
def test_iou(dtype):
    boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
                         dtype=dtype)
    boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]],
                         dtype=dtype)
    expected_result = tf.constant([0.875, 1.0], dtype=dtype)
    loss = giou_loss(boxes1, boxes2, mode="iou")
    test_utils.assert_allclose_according_to_type(loss, expected_result)
示例#13
0
def test_sparsemax_of_zero(dtype):
    """check sparsemax proposition 1, part 1."""
    z = np.zeros((1, 10))

    tf_sparsemax_out = sparsemax(z.astype(dtype))
    np_sparsemax = np.ones_like(z, dtype=dtype) / z.size

    test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
示例#14
0
def test_sharing_frobenius(dtype):
    var0 = tf.Variable([1.0, 2.0], dtype=dtype)
    var1 = tf.Variable([3.0, 4.0], dtype=dtype)
    grads0 = tf.constant([0.1, 0.1], dtype=dtype)
    grads1 = tf.constant([0.01, 0.01], dtype=dtype)
    norm0 = tf.math.reduce_sum(grads0 ** 2) ** 0.5
    norm1 = tf.math.reduce_sum(grads1 ** 2) ** 0.5
    learning_rate = 0.1
    lambda_ = 0.1
    ord = "fro"
    cg_opt = cg_lib.ConditionalGradient(
        learning_rate=learning_rate, lambda_=lambda_, ord=ord
    )
    _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

    # Check we have slots
    assert ["conditional_gradient"] == cg_opt.get_slot_names()
    slot0 = cg_opt.get_slot(var0, "conditional_gradient")
    assert slot0.get_shape() == var0.get_shape()
    slot1 = cg_opt.get_slot(var1, "conditional_gradient")
    assert slot1.get_shape() == var1.get_shape()

    # Because in the eager mode, as we declare two cg_update
    # variables, it already altomatically finish executing them.
    # Thus, we cannot test the param value at this time for
    # eager mode. We can only test the final value of param
    # after the second execution.

    # Step 2: the second conditional_gradient contain
    # the previous update.
    # Check that the parameters have been updated.
    cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    test_utils.assert_allclose_according_to_type(
        np.array(
            [
                (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0)
                * learning_rate
                - (1 - learning_rate) * lambda_ * 0.1 / norm0,
                (2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0)
                * learning_rate
                - (1 - learning_rate) * lambda_ * 0.1 / norm0,
            ]
        ),
        var0.numpy(),
    )
    test_utils.assert_allclose_according_to_type(
        np.array(
            [
                (3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1)
                * learning_rate
                - (1 - learning_rate) * lambda_ * 0.01 / norm1,
                (4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1)
                * learning_rate
                - (1 - learning_rate) * lambda_ * 0.01 / norm1,
            ]
        ),
        var1.numpy(),
    )
示例#15
0
文件: crf_test.py 项目: wit543/addons
def test_crf_decode(dtype):
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                 dtype=dtype)
    # Test both the length-1 and regular cases.
    sequence_lengths_list = [
        np.array(3, dtype=np.int32),
        np.array(1, dtype=np.int64),
    ]
    inputs_list = [
        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype),
        np.array([[-1, 2, 1]], dtype=dtype),
    ]
    tag_indices_list = [
        np.array([1, 2, 1, 0], dtype=np.int32),
        np.array([2], dtype=np.int32),
    ]

    for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list,
                                                     inputs_list,
                                                     tag_indices_list):
        num_words = inputs.shape[0]
        num_tags = inputs.shape[1]

        all_sequence_scores = []
        all_sequences = []

        # Compare the dynamic program with brute force computation.
        for tag_indices in itertools.product(range(num_tags),
                                             repeat=sequence_lengths):
            tag_indices = list(tag_indices)
            tag_indices.extend([0] * (num_words - sequence_lengths))
            all_sequences.append(tag_indices)
            sequence_score = text.crf_sequence_score(
                inputs=tf.expand_dims(inputs, 0),
                tag_indices=tf.expand_dims(tag_indices, 0),
                sequence_lengths=tf.expand_dims(sequence_lengths, 0),
                transition_params=tf.constant(transition_params),
            )
            sequence_score = tf.squeeze(sequence_score, [0])
            all_sequence_scores.append(sequence_score)

        expected_max_sequence_index = np.argmax(all_sequence_scores)
        expected_max_sequence = all_sequences[expected_max_sequence_index]
        expected_max_score = all_sequence_scores[expected_max_sequence_index]

        actual_max_sequence, actual_max_score = text.crf_decode(
            tf.expand_dims(inputs, 0),
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )
        actual_max_sequence = tf.squeeze(actual_max_sequence, [0])
        actual_max_score = tf.squeeze(actual_max_score, [0])

        test_utils.assert_allclose_according_to_type(actual_max_score,
                                                     expected_max_score, 1e-6,
                                                     1e-6)
        assert (list(actual_max_sequence[:sequence_lengths]) ==
                expected_max_sequence[:sequence_lengths])
示例#16
0
def test_theoretical_gradients(dtype, approximate):
    # Only test theoretical gradients for float32 and float64
    # because of the instability of float16 while computing jacobian
    x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype)

    theoretical, numerical = tf.test.compute_gradient(
        lambda x: gelu(x, approximate=approximate), [x]
    )
    test_utils.assert_allclose_according_to_type(theoretical, numerical, atol=1e-4)
示例#17
0
def test_different_shapes(dtype):
    boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
                         dtype=dtype)
    boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0]], dtype=dtype)
    expand_boxes1 = tf.expand_dims(boxes1, -2)
    expand_boxes2 = tf.expand_dims(boxes2, 0)
    expected_result = tf.constant([1.07500000298023224, 1.366071], dtype=dtype)
    loss = giou_loss(expand_boxes1, expand_boxes2)
    test_utils.assert_allclose_according_to_type(loss, expected_result)
示例#18
0
def test_hardshrink(dtype):
    x = tf.constant([-2.0, -0.5, 0.0, 0.5, 2.0], dtype=dtype)
    expected_result = tf.constant([-2.0, 0.0, 0.0, 0.0, 2.0], dtype=dtype)
    test_utils.assert_allclose_according_to_type(_hardshrink_custom_op(x),
                                                 expected_result)

    expected_result = tf.constant([-2.0, 0.0, 0.0, 0.0, 2.0], dtype=dtype)
    test_utils.assert_allclose_according_to_type(
        _hardshrink_custom_op(x, lower=-1.0, upper=1.0), expected_result)
示例#19
0
def test_softshrink(dtype):
    x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype)
    expected_result = tf.constant([-1.5, -0.5, 0.0, 0.5, 1.5], dtype=dtype)
    test_utils.assert_allclose_according_to_type(softshrink(x),
                                                 expected_result)

    expected_result = tf.constant([-1.0, 0.0, 0.0, 0.0, 1.0], dtype=dtype)
    test_utils.assert_allclose_according_to_type(
        softshrink(x, lower=-1.0, upper=1.0), expected_result)
示例#20
0
def test_crf_log_norm_zero_seq_length(dtype):
    """Test `crf_log_norm` when `sequence_lengths` contains one or more
    zeros."""
    inputs = tf.constant(np.ones([2, 10, 5], dtype=dtype))
    transition_params = tf.constant(np.ones([5, 5], dtype=dtype))
    sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32))
    expected_log_norm = np.zeros([2], dtype=dtype)
    log_norm = text.crf_log_norm(inputs, sequence_lengths, transition_params)
    test_utils.assert_allclose_according_to_type(log_norm, expected_log_norm)
示例#21
0
def test_giou_loss(dtype):
    boxes1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
                         dtype=dtype)
    boxes2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]],
                         dtype=dtype)
    expected_result = tf.constant([1.07500000298023224, 1.9333333373069763],
                                  dtype=dtype)
    loss = giou_loss(boxes1, boxes2)
    test_utils.assert_allclose_according_to_type(loss, expected_result)
示例#22
0
def test_theoretical_gradients(dtype):
    # Only test theoretical gradients for float32 and float64
    # because of the instability of float16 while computing jacobian

    # Hardshrink is not continuous at `lower` and `upper`.
    # Avoid these two points to make gradients smooth.
    x = tf.constant([-2.0, -1.5, 0.0, 1.5, 2.0], dtype=dtype)

    theoretical, numerical = tf.test.compute_gradient(_hardshrink_custom_op, [x])
    test_utils.assert_allclose_according_to_type(theoretical, numerical, atol=1e-4)
示例#23
0
def test_gelu(dtype):
    x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype)
    expected_result = tf.constant(
        [-0.04540229, -0.158808, 0.0, 0.841192, 1.9545977], dtype=dtype)
    test_utils.assert_allclose_according_to_type(gelu(x), expected_result)

    expected_result = tf.constant(
        [-0.04550028, -0.15865526, 0.0, 0.8413447, 1.9544997], dtype=dtype)
    test_utils.assert_allclose_according_to_type(gelu(x, False),
                                                 expected_result)
示例#24
0
def test_sparsemax_against_numpy(dtype):
    """check sparsemax kernel against numpy."""
    random = np.random.RandomState(1)

    z = random.uniform(low=-3, high=3, size=(test_obs, 10))

    tf_sparsemax_out = sparsemax(z.astype(dtype))
    np_sparsemax = _np_sparsemax(z).astype(dtype)

    test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
示例#25
0
def test_crf_constrained_decode(dtype):
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                 dtype=dtype)
    # Test both the length-1 and regular cases.
    sequence_lengths_list = [
        np.array(3, dtype=np.int32),
        np.array(1, dtype=np.int32)
    ]
    inputs_list = [
        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype),
        np.array([[4, 5, -3]], dtype=dtype),
    ]
    tag_bitmap_list = [
        np.array(
            [
                [True, False, False],
                [False, True, True],
                [False, True, True],
                [False, True, True],
            ],
            dtype=np.bool,
        ),
        np.array([[False, True, True]], dtype=np.bool),
    ]
    for sequence_lengths, inputs, tag_bitmap in zip(sequence_lengths_list,
                                                    inputs_list,
                                                    tag_bitmap_list):
        filtered_inputs = text.crf_filtered_inputs(
            inputs=tf.expand_dims(inputs, 0),
            tag_bitmap=tf.expand_dims(tag_bitmap, 0))

        expected_max_sequence, expected_max_score = text.crf_decode(
            filtered_inputs,
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )

        expected_max_sequence = tf.squeeze(expected_max_sequence, [0])
        expected_max_score = tf.squeeze(expected_max_score, [0])

        actual_max_sequence, actual_max_score = text.crf_constrained_decode(
            tf.expand_dims(inputs, 0),
            tf.expand_dims(tag_bitmap, 0),
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )

        actual_max_sequence = tf.squeeze(actual_max_sequence, [0])
        actual_max_score = tf.squeeze(actual_max_score, [0])

        test_utils.assert_allclose_according_to_type(actual_max_score,
                                                     expected_max_score, 1e-6,
                                                     1e-6)
        assert list(actual_max_sequence[:sequence_lengths]) == list(
            expected_max_sequence[:sequence_lengths])
示例#26
0
def test_basic_with_learning_rate_decay():
    for i, dtype in enumerate(
            _dtypes_to_test(use_gpu=test_utils.is_gpu_available())):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7
        decay = 0.5
        lamb_wd = 0.01

        opt = lamb.LAMB(
            learning_rate=learning_rate,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon,
            weight_decay=lamb_wd,
            decay=decay,
        )

        # Run 3 steps of LAMB
        for t in range(3):
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            lr_np = learning_rate / (1 + decay * t)

            var0_np, m0, v0 = lamb_update_numpy(var0_np,
                                                grads0_np,
                                                t,
                                                m0,
                                                v0,
                                                lr=lr_np,
                                                lamb_wd=lamb_wd)
            var1_np, m1, v1 = lamb_update_numpy(var1_np,
                                                grads1_np,
                                                t,
                                                m1,
                                                v1,
                                                lr=lr_np,
                                                lamb_wd=lamb_wd)

            # Validate updated params
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
示例#27
0
def verify_funcs_are_equivalent(dtype):
    x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype)
    x = tf.convert_to_tensor(x_np)
    with tf.GradientTape(persistent=True) as t:
        t.watch(x)
        y_native = tanhshrink(x)
        y_py = _tanhshrink_py(x)
    test_utils.assert_allclose_according_to_type(y_native, y_py)
    grad_native = t.gradient(y_native, x)
    grad_py = t.gradient(y_py, x)
    test_utils.assert_allclose_according_to_type(grad_native, grad_py)
示例#28
0
def test_sparsemax_against_numpy_low_rank(dtype):
    """check sparsemax kernel against numpy."""
    random = np.random.RandomState(1)

    z = random.uniform(low=-3, high=3, size=(10))

    tf_sparsemax_out = sparsemax(z.astype(dtype)).numpy()
    np_sparsemax = np.reshape(_np_sparsemax(np.reshape(z, [1, 10])), [10]).astype(dtype)

    test_utils.assert_allclose_according_to_type(
        np_sparsemax, tf_sparsemax_out, half_atol=5e-3
    )
    assert np_sparsemax.shape == tf_sparsemax_out.shape
示例#29
0
def test_sparsemax_loss_positive(dtype):
    """check sparsemax-loss proposition 4."""
    random = np.random.RandomState(5)

    z = random.uniform(low=-3, high=3, size=(test_obs, 10))
    q = np.zeros((test_obs, 10))
    q[np.arange(0, test_obs), random.randint(0, 10, size=test_obs)] = 1

    tf_loss_op, tf_loss_out = _tf_sparsemax_loss(z, q, dtype)

    test_utils.assert_allclose_according_to_type(np.abs(tf_loss_out),
                                                 tf_loss_out)
    assert np.zeros(test_obs).shape == tf_loss_op.shape
示例#30
0
def test_sparsemax_loss_against_numpy(dtype):
    """check sparsemax-loss kernel against numpy."""
    random = np.random.RandomState(1)

    z = random.uniform(low=-3, high=3, size=(test_obs, 10))
    q = np.zeros((test_obs, 10))
    q[np.arange(0, test_obs), random.randint(0, 10, size=test_obs)] = 1

    tf_loss_op, tf_loss_out = _tf_sparsemax_loss(z, q, dtype)
    np_loss = _np_sparsemax_loss(z, q).astype(dtype)

    test_utils.assert_allclose_according_to_type(np_loss, tf_loss_out)
    assert np_loss.shape == tf_loss_op.shape