Exemplo n.º 1
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    sampleIndices = [sampleTokenIdxNoTarget(target, dataset) for _ in range(K)]

    grad = np.zeros(outputVectors.shape)
    gradPred = np.zeros(predicted.shape)
    cost = 0.0

    sig_positive = sigmoid(np.dot(outputVectors[target], predicted))
    cost += -np.log(sig_positive)
    gradPred += (sig_positive - 1) * outputVectors[target]
    grad[target] += (sig_positive - 1) * predicted

    for idx in sampleIndices:
        sig_neg = sigmoid(-np.dot(outputVectors[idx], predicted))
        cost -= np.log(sig_neg)
        gradPred -= (sig_neg - 1) * outputVectors[idx]
        grad[idx] += -(sig_neg - 1) * predicted

    return cost, gradPred, grad
Exemplo n.º 2
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    ### YOUR CODE HERE

    y = sigmoid(outputVectors[target].dot(predicted))
    cost = -np.log(y)
    gradPred = (y - 1) * outputVectors[target]
    grad = np.zeros(outputVectors.shape)
    grad[target] = (y - 1) * predicted
    for i in range(K):
        index = dataset.sampleTokenIdx()
        while index == target:
            index = dataset.sampleTokenIdx()
        y_k = sigmoid(np.dot(-outputVectors[index], predicted))
        cost -= np.log(y_k)
        gradPred += (1 - y_k) * outputVectors[index]
        grad[index] += (1 - y_k) * predicted
    # raise NotImplementedError
    ### END YOUR CODE
    return cost, gradPred, grad
Exemplo n.º 3
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    ### YOUR CODE HERE
    #产生取样的index
    #    index=[dataset.sampleTokenIdx() for k in range(K)]
    #    out_vec=outputVectors[target,:]
    #    in_vec=predicted
    #    sigma=sigmoid(np.dot(out_vec,in_vec))  #把softmax换成sigmoid函数
    #    cost=-np.log(sigma)
    #    gradPred=out_vec*(sigma-1)
    #    grad=np.zeros(outputVectors.shape)
    #
    #    for i in range(K):
    #        k_vec=outputVectors[index[i],:]
    #        sigma2=sigmoid(-np.dot(k_vec,in_vec))
    #        cost=cost-np.log(sigma2)
    #        gradPred=gradPred+k_vec*(1-sigma2)
    #        grad[index[i]]+=in_vec*(1-sigma2)
    #
    #    grad[target,:]=grad[target,:]+in_vec*(sigma-1)

    index = [dataset.sampleTokenIdx() for k in range(K)]
    u_o = outputVectors[target, :]
    v_c = predicted
    u_k = outputVectors[index, :]

    sigma1 = sigmoid(np.dot(u_o, v_c))
    sigma2 = sigmoid(-np.dot(u_k, v_c))
    grad = np.zeros(outputVectors.shape)
    cost = -np.log(sigma1) - np.sum(np.log(sigma2))
    gradPred = u_o * (sigma1 - 1) + np.dot((1 - sigma2).T, u_k)
    temp = np.outer(1 - sigma2, v_c)
    for i in range(K):
        grad[index[i], :] += temp[i]
    grad[target, :] += v_c * (sigma1 - 1)
    ### END YOUR CODE

    return cost, gradPred, grad
Exemplo n.º 4
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    ### YOUR CODE HERE

    # initiate gradient for other (neg) samples and predicted sample
    grad = np.zeros(outputVectors.shape)
    gradPred = np.zeros(predicted.shape)

    # calculate useful sigmoid value and 1-sigmoid value
    s = sigmoid(predicted.dot(outputVectors[target, :]))
    t = 1 - s

    # initiate cost function and renew the 1st step (pos sample) for grads
    cost = -np.log(s)  # cost = -yilog(s)
    gradPred -= t * outputVectors[target, :]
    grad[target, :] -= t * predicted

    # sample K times for neg samples
    for k in range(K):
        neg = dataset.sampleTokenIdx()

        # here 1 - Sigmoid(x) = Sigmoid(-x)
        s = sigmoid(-predicted.dot(outputVectors[neg, :]))
        t = 1 - s
        cost += -np.log(s)  # cost = -sum((1-yi)log(s)), yi=0

        gradPred += t * outputVectors[neg, :]
        # only neg grads will renew
        grad[neg, :] += t * predicted
    ### END YOUR CODE

    return cost, gradPred, grad
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    ### YOUR CODE HERE
    grad = np.zeros(outputVectors.shape)
    gradPred = np.zeros(predicted.shape)
    sample_indices = []
    for k in range(K):
        index = dataset.sampleTokenIdx()
        while index == target:
            index = dataset.sampleTokenIdx()
        sample_indices.append(index)
    y_predict = sigmoid(outputVectors[target, :].dot(predicted))
    outputVectors_k = outputVectors[sample_indices, :]
    y_predict_negative = sigmoid(-outputVectors_k.dot(predicted))
    cost = -np.log(y_predict) - np.sum(np.log(y_predict_negative))
    sum_k = (y_predict_negative - 1).dot(outputVectors_k)
    gradPred = (y_predict - 1) * outputVectors[target, :] - sum_k

    grad_neg_out = -np.outer(y_predict_negative - 1, predicted)

    for k in range(K):
        grad[sample_indices[k]] += grad_neg_out[k]
    grad[target, :] += (y_predict - 1) * predicted
    #raise NotImplementedError
    ### END YOUR CODE

    return cost, gradPred, grad
Exemplo n.º 6
0
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, 
    K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector  
    # and one target word vector as a building block for word2vec     
    # models, using the negative sampling technique. K is the sample  
    # size. You might want to use dataset.sampleTokenIdx() to sample  
    # a random word index. 
    # 
    # Note: See test_word2vec below for dataset's initialization.
    #                                       
    # Input/Output Specifications: same as softmaxCostAndGradient
    
    ### YOUR CODE HERE
    #raise NotImplementedError
    expected = outputVectors[target,:] #u_o
    grad = np.zeros(outputVectors.shape)
    tmp = sigmoid(np.dot(expected, predicted))
    cost = 0 - np.log(tmp)
    gradPred = 0 - (1 - tmp) * expected
    grad[target,:] = 0 - (1 - tmp) * predicted
    #negative sampling
    cnt = 0
    while True:
        idx = dataset.sampleTokenIdx()
        if idx == target:
            continue
        cnt += 1
        tmp = sigmoid(0 - np.dot(outputVectors[idx,:], predicted))
        cost -= np.log(tmp)
        gradPred += (1 - tmp) * outputVectors[idx,:]
        grad[idx,:] += (1 - tmp) * predicted
        if cnt == K:
            break
    ### END YOUR CODE
    
    return cost, gradPred, grad
Exemplo n.º 7
0
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, 
    K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector  
    # and one target word vector as a building block for word2vec     
    # models, using the negative sampling technique. K is the sample  
    # size. You might want to use dataset.sampleTokenIdx() to sample  
    # a random word index. 
    # 
    # Note: See test_word2vec below for dataset's initialization.
    #                                       
    # Input/Output Specifications: same as softmaxCostAndGradient
    
    ### YOUR CODE HERE

    grad = np.zeros(outputVectors.shape)
    gradPre = np.zeros(predicted.shape)

    indices = [target]
    for k in range(K):
        newindex = dataset.sampleTokenIdx()
        while newindex == target:
            newindex = dataset.sampleTokenIdx()
        indices += [newindex]

    labels = np.array([1] + [-1 for k in range(K)])
    vecs = outputVectors[indices, :]

    t = sigmoid(vecs.dot(predicted) * labels)
    cost = -np.sum(np.log(t))

    delta = labels * (t - 1)
    gradPred = delta.reshape((1, K + 1)).dot(vecs).flatten()
    gradtemp = delta.reshape((K + 1, 1)).dot(predicted.reshape(
        (1, predicted.shape[0])))
    for k in range(K + 1):
        grad[indices[k]] += gradtemp[k, :]

    ##raise NotImplementedError
    ### END YOUR CODE
    
    return cost, gradPred, grad
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    # YOUR CODE HERE
    gradPred = np.zeros(predicted.shape)
    grad = np.zeros(outputVectors.shape)

    new_index = dataset.sampleTokenIdx()
    indices = [target]
    # print(new_index)
    for k in range(K):
        new_index = dataset.sampleTokenIdx()
        while new_index == target:
            new_index = dataset.sampleTokenIdx()
        indices += [new_index]
    # print(indices)
    sampling_labels = np.array([1] + [-1 for k in range(K)])
    # print(sampling_labels)
    vec_out = outputVectors[indices, :]
    # print(vec_out)

    likelihood = sigmoid(vec_out.dot(predicted) * sampling_labels)
    difference = sampling_labels * (likelihood - 1)
    gradPred = np.dot(difference.reshape((1, K + 1)), vec_out)
    gradPred = gradPred.flatten()
    tokens_len = predicted.shape[0]
    gradtemp = np.dot(difference.reshape((K + 1, 1)),
                      predicted.reshape((1, tokens_len)))
    cost = -np.sum(np.log(likelihood))

    for k in range(K + 1):
        grad[indices[k]] += gradtemp[k, :]

    likelihood = sigmoid(predicted.dot(outputVectors[target, :]))
    difference = likelihood - 1
    grad[target, :] += difference * predicted
    gradPred += difference * outputVectors[target, :]
    cost = -np.log(likelihood)

    for k in range(K):
        index = dataset.sampleTokenIdx()
        temp_likelihood = np.dot(-predicted, outputVectors[index, :])
        likelihood = sigmoid(temp_likelihood)

        cost += -np.log(likelihood)
        difference = 1 - likelihood

        grad[index, :] += difference * predicted
        gradPred += difference * outputVectors[index, :]
    # raise NotImplementedError

    # END YOUR CODE

    return cost, gradPred, grad
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient

    ### YOUR CODE HERE
    # raise NotImplementedError

    indices = [dataset.sampleTokenIdx()
               for k in range(K)]  # generate sample indexes

    u_o = outputVectors[
        target, :]  # target means "o" here     # notice the array is stored by row
    v_c = predicted

    ### Method1: A slow and inefficient method using "for" loop
    ### average time: 0.000195302985752s
    # sigma1 = sigmoid(np.dot(u_o, v_c))
    # cost = -np.log(sigma1)                   # neg-sample cost
    # gradPred = u_o * (sigma1 - 1)            # the gradient with respect to v_c
    # grad = np.zeros(outputVectors.shape)     # initialize grad
    #
    # for i in range(K):
    #     u_k = outputVectors[indices[i], :]
    #     sigma2 = sigmoid(-np.dot(u_k, v_c))
    #     cost = cost - np.log(sigma2)
    #     gradPred = gradPred + u_k * (1 - sigma2)            # the gradient with respect to v_c
    #     grad[indices[i]] += v_c * (1 - sigma2)              # the gradient with respect to u_k (and k!=o)
    #
    # grad[target, :] = grad[target, :] + v_c * (sigma1 - 1)          # pay attention to the grad of target word o

    u_k = outputVectors[indices, :]

    sigma1 = sigmoid(np.dot(u_o, v_c))
    sigma2 = sigmoid(-np.dot(u_k, v_c))
    grad = np.zeros(outputVectors.shape)

    cost = -np.log(sigma1) - np.sum(np.log(sigma2))  # neg-sample cost
    gradPred = u_o * (sigma1 - 1) + np.dot(
        (1 - sigma2).T, u_k)  # the gradient with respect to v_c

    ### Method2: Trying to use some matrix operating to replace "for" loop
    ### average time: 0.000179892113867s
    ### Keep going! It only saved us about 10% time!
    # for i in range(K):
    #     grad[indices[i], :] += v_c * (1 - sigma2)[i]

    ### Method3: using np.outer!
    ### average time: 0.000135194817627s
    ### Amazing! It has saved us about 30% time!
    temp = np.outer(1 - sigma2, v_c)
    for i in range(K):
        grad[indices[i], :] += temp[i]

    ### Method4: I tried to avoid "for" loop, however I failed. :(
    ### because there are replicate values in indices, hence we can't update them once
    # grad[indices, :] += np.tile(v_c, [len(sigma2), 1]) * (1 - sigma2)[:, None]

    grad[target, :] += v_c * (sigma1 - 1
                              )  # pay attention to the grad of target word o

    ### END YOUR CODE

    return cost, gradPred, grad