Exemplo n.º 1
0
def test_adam():
    for target, ctx in ctx_list():
        data = nnvm.sym.Variable("data")
        weight = nnvm.sym.Variable("weight")
        out = nnvm.sym.elemwise_mul(data, weight**2)

        dshape = (1, 2, 3)
        wshape = dshape

        base_lr = 0.1
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8
        lr_factor = 0.5
        rescale_grad = 0.2
        wd = 0.1
        clip_gradient = 0.25

        scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr,
                                                 step=1,
                                                 factor=lr_factor)
        opt = optimizer.Adam(learning_rate=base_lr,
                             beta1=beta1,
                             beta2=beta2,
                             epsilon=epsilon,
                             lr_scheduler=scheduler,
                             rescale_grad=rescale_grad,
                             clip_gradient=clip_gradient,
                             wd=wd)
        opt_sym = opt.minimize(out, var=weight)

        inputs = [("data", dshape, data)]
        params = [("weight", wshape, weight)]

        def update_func(data, weight):
            rate_0 = np.sqrt(1 - beta2) / (1 - beta1)
            lr_0 = base_lr * lr_factor * rate_0
            gradient_0 = data * 2 * weight * rescale_grad
            gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
            m_0 = (1 - beta1) * gradient_0
            v_0 = (1 - beta2) * (gradient_0**2)
            weight_0 = weight - lr_0 * (m_0 /
                                        (np.sqrt(v_0) + epsilon) + wd * weight)
            rate_1 = np.sqrt(1 - beta2**2) / (1 - beta1**2)
            lr_1 = base_lr * (lr_factor**2) * rate_1
            gradient_1 = data * 2 * weight_0 * rescale_grad
            gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
            m_1 = beta1 * m_0 + (1 - beta1) * gradient_1
            v_1 = beta2 * v_0 + (1 - beta2) * (gradient_1**2)
            weight_1 = weight_0 - lr_1 * (m_1 / (np.sqrt(v_1) + epsilon) +
                                          wd * weight_0)
            return weight_1

        helper(opt_sym, inputs, params, update_func, 2, target, ctx)
Exemplo n.º 2
0
def test_sgd():
    for target, ctx in ctx_list():
        data = nnvm.sym.Variable("data")
        weight = nnvm.sym.Variable("weight")
        out = nnvm.sym.elemwise_mul(data, weight**2)

        dshape = (1, 2, 3)
        wshape = dshape

        base_lr = 0.1
        lr_factor = 0.5
        rescale_grad = 0.2
        wd = 0.1
        clip_gradient = 0.25

        scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr,
                                                 step=1,
                                                 factor=lr_factor)
        opt = optimizer.SGD(learning_rate=base_lr,
                            lr_scheduler=scheduler,
                            rescale_grad=rescale_grad,
                            clip_gradient=clip_gradient,
                            wd=wd)
        opt_sym = opt.minimize(out, var=weight)

        inputs = [("data", dshape, data)]
        params = [("weight", wshape, weight)]

        def update_func(data, weight):
            gradient_0 = data * 2 * weight * rescale_grad
            gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
            weight_0 = weight - base_lr * lr_factor * (gradient_0 +
                                                       wd * weight)
            gradient_1 = data * 2 * weight_0 * rescale_grad
            gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
            weight_1 = weight_0 - base_lr * (lr_factor**
                                             2) * (gradient_1 + wd * weight_0)
            return weight_1

        helper(opt_sym, inputs, params, update_func, 2, target, ctx)
Exemplo n.º 3
0
    }
    module.set_input(**new_params)
    # run
    module.run()
    # get output
    out = module.get_output(0, tvm.nd.empty(out_shape))
    # convert to numpy
    out.asnumpy()

    # Print first 10 elements of output
    print("----------Output----------")
    print(out.asnumpy().flatten())

    base_lr = 0.1
    lr_factor = 0.5
    rescale_grad = 0.2
    wd = 0.1
    clip_gradient = 0.25

    scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr,
                                             step=1,
                                             factor=lr_factor)
    opt = optimizer.SGD(learning_rate=base_lr,
                        lr_scheduler=scheduler,
                        rescale_grad=rescale_grad,
                        clip_gradient=clip_gradient,
                        wd=wd)
    opt_sym = opt.minimize(tvm.ndarray.array(
        ((real_label - out.asnumpy().flatten())**2), ctx=ctx),
                           var=params['dense0_weight'])