示例#1
0
def _gradient_test(w, instance, model, choosen_dims = None):
    '''
    The gradient test. Used to test if the gradient is correctly calculated.
    Detail of this method is described in Stochastic Gradient Descent Tricks
    by Bottou, L

    1. Pick an example z
    2. Compute the loss Q(z,w)
    3. Compute the gradient g = D_w Q(z,w)
    4. Apply a slightly pertubation to w'=w+\Delta.
    5. Compute the new loss Q(z,w') and verify Q(z,w')=Q(z,w)+g\Delta
    '''

    lossQ = _likelihood(w, instance, model)
    DlossQ = _dlikelihood(w, instance, model)

    build_instance(model.attrs, model.tags, instance, True)

    L = len(instance)
    T = len(model.tags)

    if not choosen_dims:
        U = instance.unigram_features_table
        B = instance.bigram_features_table
        features = []
        for i in range(L):
            for j in range(T):
                if i == 0:
                    features.extend(U[i,j])
                else:
                    for k in range(T):
                        features.extend(U[i,j].tolist())
                        features.extend(B[k,j].tolist())
        choosen_dims = random.sample(features, 5)

    epsilon = 1e-4
    grad_diff = epsilon * DlossQ[choosen_dims].sum()
    w[choosen_dims] += epsilon

    lossQ2 = _likelihood(w, instance, model)

    if abs(lossQ2 - (lossQ + grad_diff)) > 1e-7:
        LOG(WARN, "Failed gradient test.")
        LOG(WARN, "Pertubation on %s dims." % str(choosen_dims))
        LOG(WARN, "Loss before pertubation: %f" % lossQ)
        LOG(WARN, "Loss after pertubation: %f" % lossQ2)
        LOG(WARN, "Gradient difference: %f" % grad_diff)
    else:
        LOG(INFO, "Success gradient test.")

    w[choosen_dims] -= epsilon
示例#2
0
def _gradient_test(w, instance, model, choosen_dims=None):
    '''
    The gradient test. Used to test if the gradient is correctly calculated.
    Detail of this method is described in Stochastic Gradient Descent Tricks
    by Bottou, L

    1. Pick an example z
    2. Compute the loss Q(z,w)
    3. Compute the gradient g = D_w Q(z,w)
    4. Apply a slightly pertubation to w'=w+\Delta.
    5. Compute the new loss Q(z,w') and verify Q(z,w')=Q(z,w)+g\Delta
    '''

    lossQ = _likelihood(w, instance, model)
    DlossQ = _dlikelihood(w, instance, model)

    build_instance(model.attrs, model.tags, instance, True)

    L = len(instance)
    T = len(model.tags)

    if not choosen_dims:
        U = instance.unigram_features_table
        B = instance.bigram_features_table
        features = []
        for i in range(L):
            for j in range(T):
                if i == 0:
                    features.extend(U[i, j])
                else:
                    for k in range(T):
                        features.extend(U[i, j].tolist())
                        features.extend(B[k, j].tolist())
        choosen_dims = random.sample(features, 5)

    epsilon = 1e-4
    grad_diff = epsilon * DlossQ[choosen_dims].sum()
    w[choosen_dims] += epsilon

    lossQ2 = _likelihood(w, instance, model)

    if abs(lossQ2 - (lossQ + grad_diff)) > 1e-7:
        LOG(WARN, "Failed gradient test.")
        LOG(WARN, "Pertubation on %s dims." % str(choosen_dims))
        LOG(WARN, "Loss before pertubation: %f" % lossQ)
        LOG(WARN, "Loss after pertubation: %f" % lossQ2)
        LOG(WARN, "Gradient difference: %f" % grad_diff)
    else:
        LOG(INFO, "Success gradient test.")

    w[choosen_dims] -= epsilon
示例#3
0
def _dlikelihood(w, instance, model):
    '''
    Calculate gradient of a instance

    - param[in] w           The weight vector
    - param[in] instance    The instance
    - param[in] model       The model
    '''
    grad = zeros(w.shape[0], dtype=float)

    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    build_instance(model.attrs, model.tags, instance, True)
    g0, g = build_score_cache(w, L, T, A, instance)

    F = instance.correct_features
    for k, v in F.iteritems():
        grad[k] += v

    a = forward(g0, g, L, T)  # forward
    b = backward(g, L, T)  # backward

    logZ = logsumexp(a[L - 1, :])

    U = instance.unigram_features_table
    B = instance.bigram_features_table

    c = exp(g0 + b[0, :] - logZ).clip(0., 1.)
    for j in xrange(T):
        grad[U[0, j]] -= c[j]

    for i in xrange(1, L):
        c = exp(add.outer(a[i - 1, :], b[i, :]) + g[i, :, :] - logZ).clip(
            0., 1.)
        # The following code is an equilism of this
        #for j in range(T):
        #    for k in range(T):
        #        grad[U[i,k]] -= c[j,k]
        #        grad[B[j,k]] -= c[j,k]
        for k in range(T):
            grad[U[i, k]] -= c[:, k].sum()
        grad[range(A * T, (A + T) * T)] -= c.flatten()

    return grad
示例#4
0
def _dlikelihood(w, instance, model):
    '''
    Calculate gradient of a instance

    - param[in] w           The weight vector
    - param[in] instance    The instance
    - param[in] model       The model
    '''
    grad = zeros(w.shape[0], dtype=float)

    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    build_instance(model.attrs, model.tags, instance, True)
    g0, g = build_score_cache(w, L, T, A, instance)

    F = instance.correct_features
    for k, v in F.iteritems():
        grad[k] += v

    a = forward(g0, g, L, T)    # forward
    b = backward(g, L, T)       # backward

    logZ = logsumexp(a[L-1,:])

    U = instance.unigram_features_table
    B = instance.bigram_features_table

    c = exp(g0 + b[0,:] - logZ).clip(0., 1.)
    for j in xrange(T):
        grad[U[0,j]] -= c[j]

    for i in xrange(1, L):
        c = exp(add.outer(a[i-1,:], b[i,:]) + g[i,:,:] - logZ).clip(0.,1.)
        # The following code is an equilism of this
        #for j in range(T):
        #    for k in range(T):
        #        grad[U[i,k]] -= c[j,k]
        #        grad[B[j,k]] -= c[j,k]
        for k in range(T):
            grad[U[i,k]] -= c[:,k].sum()
        grad[range(A*T, (A+T)*T)] -= c.flatten()

    return grad
示例#5
0
def l2sgd(model,
          instances,
          nr_epoth,
          init_learning_rate,
          adjust_learning_rate = False):

    _sigma = 1.
    _gamma = init_learning_rate
    _t = 1.

    _eta = 0.
    samples = random.sample(instances, min(int(len(instances) * 0.1), 1000))

    for epoth in xrange(nr_epoth):
        LOG(INFO, "Training epoth [%d]" % epoth)
        # randomly shuffle the training instances
        random.shuffle(instances)

        # loop over the training instances
        for index, instance in enumerate(instances):
            # first need to clear the cache
            build_instance(model.w, model.attrs, model.tags, instance)

            for k, v in expectation(model, instance).iteritems():
                model.w[k] -= v * _gamma
            for k, v in instance.correct_features.iteritems():
                model.w[k] += v * _gamma

            # re-calculate the scale
            _gamma = init_learning_rate / (1 + sqrt(float(epoth)))

            _t += 1.

            if (index + 1) % 1000 == 0:
                LOG(INFO, "%d instances is trained" % (index + 1))

            destroy_instance(instance)

        LOG(INFO, "%d instances is trained" % (index + 1))
        LOG(INFO, "Parameters norm %f" % norm(model.w))
示例#6
0
def l2sgd(model,
          instances,
          nr_epoth,
          init_learning_rate,
          adjust_learning_rate=False):

    _sigma = 1.
    _gamma = init_learning_rate
    _t = 1.

    _eta = 0.
    samples = random.sample(instances, min(int(len(instances) * 0.1), 1000))

    for epoth in xrange(nr_epoth):
        LOG(INFO, "Training epoth [%d]" % epoth)
        # randomly shuffle the training instances
        random.shuffle(instances)

        # loop over the training instances
        for index, instance in enumerate(instances):
            # first need to clear the cache
            build_instance(model.w, model.attrs, model.tags, instance)

            for k, v in expectation(model, instance).iteritems():
                model.w[k] -= v * _gamma
            for k, v in instance.correct_features.iteritems():
                model.w[k] += v * _gamma

            # re-calculate the scale
            _gamma = init_learning_rate / (1 + sqrt(float(epoth)))

            _t += 1.

            if (index + 1) % 1000 == 0:
                LOG(INFO, "%d instances is trained" % (index + 1))

            destroy_instance(instance)

        LOG(INFO, "%d instances is trained" % (index + 1))
        LOG(INFO, "Parameters norm %f" % norm(model.w))
示例#7
0
def viterbi(model, instance):
    '''
    '''
    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    build_instance(model.attrs, model.tags, instance, False)
    g0, g = build_score_cache(model.w, L, T, A, instance)
    destroy_instance(instance)

    s, p = argmax(g0, g, L, T)

    v, i = s[L - 1].argmax(), L - 1

    ret = []
    while i >= 0:
        ret.append(v)
        v = p[i][v]
        i -= 1

    ret.reverse()
    return ret
示例#8
0
def viterbi(model, instance):
    '''
    '''
    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    build_instance(model.attrs, model.tags, instance, False)
    g0, g = build_score_cache(model.w, L, T, A, instance)
    destroy_instance(instance)

    s, p = argmax(g0, g, L, T)

    v, i = s[L -1].argmax(), L -1

    ret = []
    while i >= 0:
        ret.append(v)
        v = p[i][v]
        i -= 1

    ret.reverse()
    return ret
示例#9
0
def _likelihood(w, instance, model):
    '''
    Calculate the likelihood of one instance

    - param[in] w
    - param[in] Instance
    - param[in] model
    '''
    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    # Filling the correct_features and features_table
    build_instance(model.attrs, model.tags, instance, True)
    g0, g = build_score_cache(w, L, T, A, instance)

    # calcualte the correct likelihood
    F = instance.correct_features
    ret = array([w[k] * v for k, v in F.iteritems()]).sum()

    # calcualte the marginal
    a = forward(g0, g, L, T)

    return ret - logsumexp(a[L - 1, :])
示例#10
0
def _likelihood(w, instance, model):
    '''
    Calculate the likelihood of one instance

    - param[in] w
    - param[in] Instance
    - param[in] model
    '''
    L = len(instance)
    T = model.nr_tags
    A = model.nr_attrs

    # Filling the correct_features and features_table
    build_instance(model.attrs, model.tags, instance, True)
    g0, g = build_score_cache(w, L, T, A, instance)

    # calcualte the correct likelihood
    F = instance.correct_features
    ret = array([w[k] * v for k, v in F.iteritems()]).sum()

    # calcualte the marginal
    a = forward(g0, g, L, T)

    return ret - logsumexp(a[L-1,:])