def negSamplingCostAndGradient(predicted, target, outputVectors, dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    ### YOUR CODE HERE
    """
    Arguments:
    predicted -- v_c
    target -- o in the notations
    outputVectors -- all the U's (but as rows and not as columns (need to transpose)
    dataset -- needed for negative sampling, unused here.

    Return:
    cost -- neg-sampling cost
    gradPred -- dJ/dv_c
    grad -- dJ/dU
    """
    target_pred_dot_sig = sigmoid(np.dot(outputVectors[indices[0]], predicted)) # s(u_o^T * v_c)
    sample_pred_dot_sig = sigmoid(-np.dot(outputVectors[indices[1:]], predicted)) # s(u_k^T * v_c) as whole matrix
    log_part = np.log(target_pred_dot_sig)
    sum_part = np.sum(np.log(sample_pred_dot_sig))
    cost = - log_part - sum_part
    
    ## (s(U*v_c) -[1,00]) * v_c
    # e_1 = np.zeros(len(indices))
    # e_1[target] = 1
    # grad_calc = (sigmoid(np.dot(outputVectors[indices], predicted)) - e_1).reshape(-1, 1) * predicted
    # grad = np.zeros_like(outputVectors)
    # grad[target] = (sigmoid(outputVectors[]))
    # for i in xrange(len(indices)):
    #     grad[indices[i]] = grad_calc[i].copy()
    probs = outputVectors.dot(predicted)
    grad = np.zeros_like(outputVectors)
    grad[target] = (sigmoid(probs[target]) - 1) * predicted

    for k in indices[1:]:
      grad[k] += (1.0 - sigmoid(-np.dot(outputVectors[k], predicted))) * predicted
    
    ## -(1-s(u_o^T * v_c)) * u_o^T + sum_K[(1-s(-u_k^T * v_c)) * u_k^T]
    gradPred = -1 * (1 - target_pred_dot_sig) * outputVectors[indices[0]] \
               + np.sum((1 - sample_pred_dot_sig).reshape(-1,1) * outputVectors[indices[1:]], axis=0)
    ### END YOUR CODE

    return cost, gradPred, grad
示例#2
0
def your_sanity_checks():
    """
    Use this space add any additional sanity checks by running:
        python q2_gradcheck.py
    This function will not be called by the autograder, nor will
    your additional tests be graded.
    """
    print "Running your sanity checks..."
    from q1d_sigmoid import sigmoid, sigmoid_grad
    sig_f = lambda x: (sigmoid(x), sigmoid_grad(sigmoid(x)))
    gradcheck_naive(sig_f, np.random.randn(1))
    gradcheck_naive(sig_f, np.random.randn(3, ))  # 1-D test
    gradcheck_naive(sig_f, np.random.randn(4, 5))  # 2-D test
示例#3
0
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    grad = np.zeros(outputVectors.shape)
    gradPred = np.zeros(predicted.shape)

    # outputVectors >> U
    # predicted     >> v_c
    # target        >> o

    v_c = predicted
    U = outputVectors

    dot_prod = np.dot(U,v_c)
    sigmoid_out = sigmoid(dot_prod)
    sigmoid_out_neg = sigmoid(-dot_prod)

    grad[target] += v_c * (sigmoid_out[target]-1)
    gradPred += U[target] * (sigmoid_out[target]-1)

    cost = -np.log(sigmoid_out[target])

    for i in indices[1:]:
        cost -= np.log(sigmoid_out_neg[i])
        grad[i] += v_c * (1-sigmoid_out_neg[i])
        gradPred += U[i] * (1-sigmoid_out_neg[i])

    return cost, gradPred, grad
示例#4
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    ### YOUR CODE HERE
    grad = np.zeros(outputVectors.shape)  # V * d
    gradPred = np.zeros(predicted.shape)  # d size
    cost = 0
    # sigmoid(Uo dot Vc)
    sig_outer = sigmoid(np.dot(outputVectors[target], predicted))

    cost -= np.log(sig_outer)
    grad[target] = predicted * (sig_outer - 1.0)  # derive acc. to Uo
    gradPred = outputVectors[target] * (sig_outer - 1.0)  # derive acc. to Vc

    for sample in indices[1:]:
        # sigmoid(-Uk dot Vc)
        sig_val = sigmoid(-1.0 * np.dot(outputVectors[sample], predicted))
        cost -= np.log(sig_val)
        grad[sample] = (1.0 - sig_val) * predicted
        gradPred += (1.0 - sig_val) * outputVectors[sample]
    ### END YOUR CODE

    return cost, gradPred, grad
示例#5
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    ### YOUR CODE HERE
    grad = np.zeros(outputVectors.shape)

    activation = sigmoid(np.dot(outputVectors[target], predicted))
    cost = -np.log(activation)
    gradPred = (activation - 1.) * outputVectors[target]
    grad[target] = (activation - 1.) * predicted

    for idx in range(1, K + 1):
        sample_idx = indices[idx]
        sample = outputVectors[sample_idx]
        activation = sigmoid(-np.dot(sample, predicted))
        cost -= np.log(activation)
        gradPred -= (activation - 1.) * sample
        grad[sample_idx] -= (activation - 1.) * predicted

    ### END YOUR CODE

    return cost, gradPred, grad
示例#6
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    ### YOUR CODE HERE
    grad = np.zeros(outputVectors.shape)
    gradPred = np.zeros(predicted.shape)
    cost = 0
    z = sigmoid(np.dot(outputVectors[target], predicted))

    cost -= np.log(z)
    grad[target] += predicted * (z - 1.0)
    gradPred += outputVectors[target] * (z - 1.0)

    for k in xrange(K):
        samp = indices[k + 1]
        z = sigmoid(np.dot(outputVectors[samp], predicted))
        cost -= np.log(1.0 - z)
        grad[samp] += predicted * z
        gradPred += outputVectors[samp] * z
    ### END YOUR CODE

    return cost, gradPred, grad
示例#7
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    ### YOUR CODE HERE
    output_products_vector = np.dot(
        outputVectors, predicted)  # vector of uoT * Vc for o = 1,2, ... ,W
    output_sigmoid_vector = sigmoid(
        output_products_vector)  # vector of sig(uoT * Vc) for o = 1,2, ... ,W
    output_minus_sigmoid_vector = 1 - output_sigmoid_vector

    cost = -np.log(output_sigmoid_vector[target]) - np.sum(
        np.log(output_minus_sigmoid_vector[indices[1:]]))
    # cost = -log(sig(uoT * Vc) - sum_k[log(sig(-ukT * Vc))]

    grad_pred_max_part = -output_minus_sigmoid_vector[target] * outputVectors[
        target]  # -sig(uoT * Vc) * uo
    grad_pred_neg_samp_part = outputVectors[
        indices[1:]] * output_sigmoid_vector[indices[
            1:]][:, np.newaxis]  # matrix with k rows of sig(ukT * Vc) * uk
    grad_pred_sum_neg_samp = np.sum(grad_pred_neg_samp_part,
                                    axis=0)  # sum the k vectors
    gradPred = grad_pred_max_part + grad_pred_sum_neg_samp

    grad = np.zeros(
        outputVectors.shape)  # besides u0 and uk's gradient of rest is zero
    grad[target] = (output_sigmoid_vector[target] -
                    1) * predicted  # (grad(u0) = sig(u0T * Vc0) -1) * Vc
    for k in indices[1:]:  # k is small ~10 maximum so this is still efficient
        grad[k] += output_sigmoid_vector[
            k] * predicted  # grad(uk) = sig(ukT * Vc) * Vc

    ### END YOUR CODE

    return cost, gradPred, grad
示例#8
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!

    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    W = outputVectors.shape[0]  #extructing dictionary size as W
    D = outputVectors.shape[1]  #extructing embadings size as D

    #turn predicted to row vector - if not already
    if predicted.shape[0] != 1:
        predicted = np.expand_dims(predicted, axis=1)
        predicted = np.transpose(predicted)

    #calc inner product for predicted with all vectors of outputVectors
    outputVectorsSampled = outputVectors[indices, :]  # dim [ (K+1) x D ]
    inner_prod = np.matmul(
        predicted, np.transpose(outputVectorsSampled))  # dim [1 X (K+1)]
    samples_sigmoid = sigmoid(inner_prod[0, :])  # dim [K+1]

    # caculating the cost
    cost = -np.log(samples_sigmoid[0]) - np.sum(
        np.log(1 - samples_sigmoid[1:K + 1]))

    # calculating gradPred according to our calculations at 2c
    gradPred = - (1 - samples_sigmoid[0]) * outputVectorsSampled[0,:] \
               + np.sum(outputVectorsSampled[1:K+1,:] * np.tile(np.expand_dims((samples_sigmoid[1:K+1]),axis=1),(1,D)) , axis=0) # dim [ 1 x D ]

    grad = np.zeros([W, D], dtype=np.float32)  # dim [ W x D ]

    grad[indices[0], :] = -predicted * (1 - samples_sigmoid[0])
    for idx in range(1, K + 1):  #indices[1:K+1]:
        grad[indices[idx]:indices[idx] +
             1, :] += predicted * (samples_sigmoid[idx])

    return cost, gradPred, grad
示例#9
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    U = outputVectors
    uo = U[target]
    vc = predicted
    sigmoid_uo_dot_vc = sigmoid(uo.dot(vc))

    cost = -np.log(sigmoid_uo_dot_vc)
    gradPred = (sigmoid_uo_dot_vc - 1.0) * uo
    grad = np.zeros(outputVectors.shape)
    grad[target] = (sigmoid_uo_dot_vc - 1.0) * vc

    for k in indices[1:]:
        sigmoid_minus_uk_dot_vc = sigmoid(-U[k].dot(vc))
        cost -= np.log(sigmoid_minus_uk_dot_vc)
        gradPred += (1.0 - sigmoid_minus_uk_dot_vc) * U[k]
        grad[k] += (1.0 - sigmoid_minus_uk_dot_vc) * vc

    return cost, gradPred, grad
示例#10
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    y_hat = sigmoid(np.matmul(outputVectors, predicted))

    # We use here that sigmoid(-x) = 1-sigmoid(x) as proved in the PDF
    cost = -np.log(y_hat[target]) - np.sum([np.log(1 - y_hat[indices[1:]])])

    grad_pred = -(1 - y_hat[target]) * outputVectors[target]
    grad_pred += np.sum((y_hat[indices[1:]] *
                         outputVectors[indices[1:]].transpose()).transpose(),
                        0)

    # for all non negative sampling or target the grad is zero
    grad = np.zeros(shape=outputVectors.shape)
    # for target
    grad[target, :] = (-1) * (1 - y_hat[target]) * predicted
    # for negative samples
    for negative_idx in range(1, K + 1):
        grad[indices[negative_idx]] += y_hat[indices[negative_idx]] * predicted

    return cost, grad_pred, grad
示例#11
0
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    grad = np.zeros(outputVectors.shape)

    # YOUR CODE HERE
    sigmoid_result = sigmoid(outputVectors.dot(predicted))

    # Jneg(o) = -log(sigma(u_o*v_c))-sum(log(sigma(-u_k*v_c)))
    cost = -np.log(sigmoid_result[target]) - np.sum(np.log(1-sigmoid_result[indices[1:]]))

    # v_c = sum(sigmoid(u_w*v_c)u_w)-u_o for w=o,k
    gradPred = -outputVectors[target] + np.sum(
        sigmoid_result[indices][:, np.newaxis] * outputVectors[indices], axis=0)

    # u_k = sigmoid(u_k*v_c)*u_k
    for i in indices:
        grad[i] += sigmoid_result[i] * predicted
    # u_o = sigmoid(u_o*v_c)*u_o - v_c
    grad[target] -= predicted
    return cost, gradPred, grad
    # END YOUR CODE

    return cost, gradPred, grad