示例#1
0
            d_alphas[i] = np.dot(d_x, V.val)
            X.sub(alpha * V.val)  # Reverse position update
            g = L_grad(X.val, meta, i)  # Evaluate gradient
            V.add((1.0 - beta) * g).div(beta)  # Reverse momentum update
            d_v += d_x * alpha
            d_betas[i] = np.dot(d_v, V.val + g)
            d_x -= (1.0 - beta) * L_hvp_x(X.val, meta, d_v, i)
            d_meta -= (1.0 - beta) * L_hvp_meta(X.val, meta, d_v, i)
            d_v *= beta
        assert np.all(ExactRep(x0).val == X.val)
        return d_x, d_alphas, d_betas, d_meta

    return x_final, [None, hypergrad]


sgd4 = Differentiable(sgd4, partial(sgd4, forward_pass_only=False))

def sgd4_mad(L_grad, hypers, callback=None, forward_pass_only=True):

    x0, alphas, gammas, meta = hypers
    N_safe_sampling = len(alphas)
    x_init = np.copy(x0)
    x_current = np.copy(x0)
    global  v_current
    v_current = np.zeros(x0.size)
    X, V = ExactRep(x0), ExactRep(np.zeros(x0.size))
    iters = zip(range(len(alphas)), alphas, gammas)
    for i, alpha, gamma in iters:
        g = L_grad(X.val, meta, i)
        if callback: callback(X.val, V.val, g, i)
        V.mul(gamma).sub((1.0 - gamma) * g)
            if any([x_size > y_size for x_size, y_size in zip(shapes[_X_]['conv'], shapes[_Y_]['conv'])]):
                new_mode = 'full'
            else:
                new_mode = 'valid'
    
        result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])],
                          axes     = [axes['out']['conv'],   axes[_Y_]['conv']],
                          dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']],
                          mode     = new_mode)
        new_order = np.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv'])
        return np.transpose(result, new_order)
    
    
    return einsum_tensordot(A_view, B_view, all_axes), [None, grad_convolve]
    
convolve = Differentiable(convolve, partial(convolve, forward_pass_only=False))

def einsum_tensordot(A, B, axes, reverse=False):
    # Does tensor dot product using einsum, which shouldn't require a copy.
    A_axnums = list(range(A.ndim))
    B_axnums = list(range(A.ndim, A.ndim + B.ndim))
    sum_axnum = A.ndim + B.ndim
    for i_sum, (i_A, i_B) in enumerate(zip(*axes)):
        A_axnums[i_A] = sum_axnum + i_sum
        B_axnums[i_B] = sum_axnum + i_sum
    print(A_axnums, B_axnums)
    return np.einsum(A, A_axnums, B, B_axnums)

def pad_to_full(A, B, axes):
    A_pad = [(0, 0)] * A.ndim
    for ax_A, ax_B in zip(*axes):
示例#3
0
            for j, (_, (ixs,
                        _)) in enumerate(parser.idxs_and_shapes.iteritems()):
                d_betas[i, j] = np.dot(d_v[ixs], V.val[ixs])

            d_x -= L_hvp_x(X.val, meta, d_v, i)
            d_meta -= L_hvp_meta(X.val, meta, d_v, i)
            d_v *= cur_beta_vect

        assert np.all(ExactRep(x0).val == X.val)
        return d_x, d_alphas, d_betas, d_meta

    return x_final, [None, hypergrad]


sgd_parsed = Differentiable(sgd_parsed,
                            partial(sgd_parsed, forward_pass_only=False))


def adam(grad,
         x,
         callback=None,
         num_iters=100,
         step_size=0.1,
         b1=0.1,
         b2=0.01,
         eps=10**-4,
         lam=10**-4):
    m = np.zeros(len(x))
    v = np.zeros(len(x))
    for i in xrange(num_iters):
        b1t = 1 - (1 - b1) * (lam**i)
示例#4
0
            d_alphas[i] = np.dot(d_x, V.val)
            X.sub(alpha * V.val)  # Reverse position update
            g = L_grad(X.val, meta, i)  # Evaluate gradient
            V.add((1.0 - beta) * g).div(beta)  # Reverse momentum update
            d_v += d_x * alpha
            d_betas[i] = np.dot(d_v, V.val + g)
            d_x -= (1.0 - beta) * L_hvp_x(X.val, meta, d_v, i)
            d_meta -= (1.0 - beta) * L_hvp_meta(X.val, meta, d_v, i)
            d_v *= beta
        assert np.all(ExactRep(x0).val == X.val)
        return d_x, d_alphas, d_betas, d_meta

    return x_final, [None, hypergrad]


sgd4 = Differentiable(sgd4, partial(sgd4, forward_pass_only=False))


def sgd_meta_only(L_grad,
                  meta,
                  x0,
                  alpha,
                  beta,
                  N_iters,
                  callback=None,
                  forward_pass_only=True):
    X, V = ExactRep(x0), ExactRep(np.zeros(x0.size))
    for i in range(N_iters):
        g = L_grad(X.val, meta, i, record_results=True)
        if callback: callback(X.val, V.val, g, i)
        V.mul(beta).sub((1.0 - beta) * g)