def test_logistic_loss():
    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
    L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
    w_grad, = ad.gradients(L, [w])
    executor = ad.Executor([L, w_grad])

    y_val = 0
    x_val = np.array([2, 3, 4])
    w_val = np.random.random(3)

    L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val})

    logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val)))
    expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic)
    expected_w_grad = (y_val - logistic) * x_val

    print(L_val)
    print(expected_L_val)
    print(expected_w_grad)
    print(w_grad_val)

    assert expected_L_val == L_val
    assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9
def test_exp_op():
    x1 = ad.Variable(name="x1")
    y = ad.exp(x1)

    grad_x1, = ad.gradients(y, [x1])

    executor = ad.Executor([y, grad_x1])
    x1_val = 2 * np.ones(3)
    y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val})

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, np.exp(x1_val))
    assert np.array_equal(grad_x1_val, np.exp(x1_val))
def test_exp_mix_op():
    x1 = ad.Variable(name="x1")
    x2 = ad.Variable(name="x2")
    y = ad.exp(ad.log(x1 * x2) + 1)

    grad_x1, grad_x2 = ad.gradients(y, [x1, x2])

    executor = ad.Executor([y, grad_x1, grad_x2])
    x1_val = 2 * np.ones(3)
    x2_val = 4 * np.ones(3)
    y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict={x1: x1_val, x2: x2_val})

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1))
    assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val))
    assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val))
def test_mix_all():
    x1 = ad.Variable(name="x1")
    y = 1 / (1 + ad.exp(-ad.reduce_sum(x1)))

    grad_x1, = ad.gradients(y, [x1])

    executor = ad.Executor([y, grad_x1])
    x1_val = 2 * np.ones(3)
    y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val})
    expected_y_val = 1 / (1 + np.exp(-np.sum(x1_val)))
    expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val)

    print(expected_y_grad)
    print(grad_x1_val)
    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_y_val)
    assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10
def test_logistic():
    x1 = ad.Variable(name="x1")
    w = ad.Variable(name='w')
    y = 1 / (1 + ad.exp(-ad.reduce_sum(w * x1)))

    grad_w, = ad.gradients(y, [w])

    executor = ad.Executor([y, grad_w])
    x1_val = 3 * np.ones(3)
    w_val = 3 * np.zeros(3)
    y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val})
    expected_y_val = 1 / (1 + np.exp(-np.sum(w_val * x1_val)))
    expected_y_grad = expected_y_val * (1 - expected_y_val) * x1_val

    print(expected_y_grad)
    print(grad_w_val)
    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_y_val)
    assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7
def test_reduce_sum_mix():
    x1 = ad.Variable(name="x1")
    y = ad.exp(ad.reduce_sum(x1))

    grad_x1, = ad.gradients(y, [x1])

    executor = ad.Executor([y, grad_x1])
    x1_val = 2 * np.ones(3)
    y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val})
    expected_y_val = np.exp(np.sum(x1_val))
    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_y_val)
    assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val))

    y2 = ad.log(ad.reduce_sum(x1))
    grad_x2, = ad.gradients(y2, [x1])
    executor2 = ad.Executor([y2, grad_x2])
    y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val})
    expected_y2_val = np.log(np.sum(x1_val))
    assert isinstance(y2, ad.Node)
    assert np.array_equal(y2_val, expected_y2_val)
    assert np.array_equal(grad_x2_val, (1 / np.sum(x1_val)) * np.ones_like(x1_val))
Exemplo n.º 7
0
def auto_diff_lr():
    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    # 注意,以下实现某些情况会有很大的数值误差,
    # 所以一般真实系统实现会提供高阶算子,从而减少数值误差

    h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
    L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
    w_grad, = ad.gradients(L, [w])
    executor = ad.Executor([L, w_grad])

    N = 100
    X_val, Y_val = gen_2d_data(N)
    w_val = np.ones(3)

    plot(N, X_val, Y_val, w_val)
    executor = ad.Executor([L, w_grad])
    test_accuracy(w_val, X_val, Y_val)
    alpha = 0.01
    max_iters = 300
    for iteration in range(max_iters):
        acc_L_val = 0
        for i in range(N):
            x_val = X_val[i]
            y_val = np.array(Y_val[i])
            L_val, w_grad_val = executor.run(feed_dict={
                w: w_val,
                x: x_val,
                y: y_val
            })
            w_val += alpha * w_grad_val
            acc_L_val += L_val
        print("iter = %d, likelihood = %s, w = %s" %
              (iteration, acc_L_val, w_val))
    test_accuracy(w_val, X_val, Y_val)
    plot(N, X_val, Y_val, w_val, True)