示例#1
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    in shape (word vector length, )
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors is
                    in shape (num words in vocab, word vector length)
                    for all words in vocab (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     in shape (word vector length, )
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    in shape (num words in vocab, word vector length)
                    (dJ / dU)
    """

    ### YOUR CODE HERE (~6-8 Lines)
    dot_prods = outsideVectors.dot(centerWordVec)
    y_hat = softmax(dot_prods)
    loss = -np.log(y_hat)[outsideWordIdx]
    diff = y_hat
    diff[outsideWordIdx] -= 1  # y_hat-y
    gradCenterVec = diff.dot(outsideVectors)
    diff = diff.reshape(-1, 1)  # Making y_hat-y a column vector
    vc = centerWordVec.reshape(1, -1)  # Making centerWordVec a row vector
    gradOutsideVecs = diff.dot(vc)
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#2
0
def naiveSoftmaxLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset
):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.  
    y_hat = softmax(np.dot(outsideVectors, centerWordVec)) 
    loss = -np.log(y_hat[outsideWordIdx])

    diff = y_hat.copy()
    diff[outsideWordIdx] -= 1 # y_hat - y

    gradCenterVec = np.dot(diff, outsideVectors) 
    gradOutsideVecs = np.dot(diff[:, np.newaxis], centerWordVec[:, np.newaxis].T) 
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE
    N = np.shape(centerWordVec)[0]
    V = np.shape(outsideVectors)[0]
    y_pred = softmax(np.transpose(np.matmul(outsideVectors, centerWordVec)))
    y_label = np.zeros(shape=(V, ))
    y_label[outsideWordIdx] = 1
    loss = -np.log(y_pred[outsideWordIdx, ])
    gradCenterVec = np.matmul(np.transpose(outsideVectors),
                              np.reshape(y_pred - y_label, (V, 1)))
    gradCenterVec = np.reshape(gradCenterVec, (N, ))
    gradOutsideVecs = np.matmul(
        np.repeat(np.reshape(centerWordVec, (N, 1)), V, axis=1),
        np.diag(y_pred - y_label))
    gradOutsideVecs = np.transpose(gradOutsideVecs)
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#4
0
def naiveSoftmaxLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset
):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow. 
    # the embedding is n-dimensional
    centerWordVec = centerWordVec.reshape(-1, 1) # n * 1
    scores = np.dot(outsideVectors, centerWordVec).reshape(-1, )
    probs = softmax(scores).reshape(-1, 1)
    loss = -np.log(probs[outsideWordIdx, 0])
    gradCenterVec = -outsideVectors[outsideWordIdx, :] + np.dot(probs.T, outsideVectors)
    gradOutsideVecs = np.dot(probs, centerWordVec.T)
    gradOutsideVecs[outsideWordIdx, :] -= (centerWordVec).reshape(-1,)
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#5
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    probs = softmax(outsideVectors.dot(centerWordVec))
    tj = (np.argmax(probs) == outsideWordIdx)
    yj = probs[outsideWordIdx]
    loss = -np.log(yj)
    Tj = np.zeros((probs.shape))
    Tj[outsideWordIdx] = 1
    gradOutsideVecs = (probs - Tj).reshape(-1, 1) * (centerWordVec).reshape(
        1, -1)
    gradCenterVec = np.sum((probs - Tj).reshape(-1, 1) * outsideVectors,
                           axis=0)

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#6
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U (|V| x n) in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    # YOUR CODE HERE

    # Please use the provided softmax function (imported earlier in this file)
    # This numerically stable implementation helps you avoid issues pertaining
    # to integer overflow.

    scores = outsideVectors @ centerWordVec
    prob = softmax(scores)[:, np.newaxis]

    loss = float(-np.log(prob[outsideWordIdx]))

    trueOutsideVec = outsideVectors[outsideWordIdx]
    gradCenterVec = -trueOutsideVec + np.sum(outsideVectors * prob, axis=0)

    gradOutsideVecs = np.dot(prob, centerWordVec[:, np.newaxis].T)
    gradOutsideVecs[outsideWordIdx] -= centerWordVec

    # END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    temp = centerWordVec * outsideVectors
    temp = np.sum(temp, axis=1)
    soft = softmax(temp)
    loss = -1 * np.log(soft[outsideWordIdx])
    gradCenterVec = np.sum(outsideVectors * soft.reshape(soft.shape[0], 1),
                           axis=0) - outsideVectors[outsideWordIdx]
    gradOutsideVecs = soft.reshape(soft.shape[0], 1) * np.repeat(
        centerWordVec, repeats=outsideVectors.shape[0], axis=0)
    gradOutsideVecs[outsideWordIdx] -= centerWordVec.reshape(
        centerWordVec.shape[1])

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#8
0
def naiveSoftmaxLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset
):
    """ Naive Softmax loss & gradient function for word2vec models
    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.
    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.
    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    y_hat = softmax(centerWordVec @ outsideVectors.T)
    loss = -np.log(y_hat)[outsideWordIdx]

    ## compute derivative to center word
    ## referring to: https://courses.cs.ut.ee/MTAT.03.277/2015_fall/uploads/Main/word2vec.pdf
    Diff = y_hat.copy()
    Diff[outsideWordIdx] -= 1

    gradCenterVec = outsideVectors.T @ Diff
    gradOutsideVecs = np.expand_dims(Diff, axis=1) @ np.expand_dims(centerWordVec, axis=1).T

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#9
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    logits = np.matmul(np.transpose(outsideVectors), centerWordVec)
    y_bar = softmax(logits)
    yo_bar = y_bar[outsideWordIdx]
    loss = -np.log(yo_bar)

    N, V = outsideVectors.shape
    y = np.zeros(V)
    y[outsideWordIdx] = 1

    gradCenterVec = np.matmul(np.transpose(outsideVectors), y_bar - y)
    gradOutsideVecs = np.outer(outsideVectors, y_bar - y)

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#10
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models
    Implement the naive softmax loss and gradients between a center word's
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.
    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.
    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    # 정리 : 총 네가지를 인자로 받아 naive soft max loss를 계산하여 그 loss와 이를 바탕으로 gradient를 수정한 임베딩 벡터 두 개 반환
    ### YOUR CODE HERE
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    #softmax 취하고 이를 이용해 손실을 정의함.
    _softmax = softmax(np.dot(outsideVectors, centerWordVec))  #이 친구가 y_hat이다.
    loss = -np.log(_softmax[outsideWordIdx])  #handout에서 -log(y_hat_o) 라고 정의

    #Gradient Descent
    _softmax[
        outsideWordIdx] -= 1  # (y_hat - y)를 해 줌. 어차피 y는 원핫벡터이기 때문에 outsidewordidx가 있는 곳만 1이므로 그냥 y_hat에서 그 부분에서만 1빼주면 되는 것
    gradCenterVec = np.dot(
        outsideVectors.T, _softmax
    )  # 원래는 U(y_hat-y)인데 위에서 y_hat-y 했으니까 그냥 U*y_hat 곱함. 차원 맞춰야해서 Transpose함.
    gradOutsideVecs = np.dot(np.expand_dims(_softmax, axis=1),
                             np.expand_dims(centerWordVec, axis=0))
    #리스트 형태를 행렬로 만들어주기 위해 expand_dims를 함으로써 각각 V*1과 1*N으로 만들고 둘을 곱함으로써 최종 결과로 outsidevector의 차원인 V*N이 나옴.

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#11
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    in shape (word vector length, )
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors is
                    in shape (num words in vocab, word vector length) 
                    for all words in vocab (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     in shape (word vector length, )
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    in shape (num words in vocab, word vector length) 
                    (dJ / dU)
    """

    ### YOUR CODE HERE (~6-8 Lines)

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    logits = softmax(outsideVectors @ centerWordVec)
    loss = -np.log(logits[outsideWordIdx])

    label = np.zeros_like(logits)
    label[outsideWordIdx] = 1
    gradCenterVec = (logits - label) @ outsideVectors

    gradOutsideVecs = np.outer((logits - label), centerWordVec)
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#12
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    in shape (word vector length, )
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors is
                    in shape (num words in vocab, word vector length) 
                    for all words in vocab (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     in shape (word vector length, )
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    in shape (num words in vocab, word vector length) 
                    (dJ / dU)
    """

    # YOUR CODE HERE (~6-8 Lines)
    y_hat = softmax(outsideVectors @ centerWordVec.T)  # (N,1)
    # print(y_hat.shape)
    loss = -np.log(y_hat[outsideWordIdx])
    gradCenterVec = y_hat.T @ outsideVectors - outsideVectors[outsideWordIdx]
    y_hat_minus = y_hat.copy()
    y_hat_minus[outsideWordIdx] -= 1
    gradOutsideVecs = y_hat_minus.reshape(-1, 1) @ centerWordVec.reshape(1, -1)

    # Please use the provided softmax function (imported earlier in this file)
    # This numerically stable implementation helps you avoid issues pertaining
    # to integer overflow.

    # END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#13
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models
    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.
    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.
    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    # centerWordVec:  (embedding_dim,1)
    # outsideVectors: (vocab_size,embedding_dim)

    scores = np.matmul(outsideVectors, centerWordVec)  # (vocab_size,1)
    probs = softmax(scores)  # (vocab_size,1)  y_hat

    loss = -np.log(probs[outsideWordIdx])

    dscores = probs.copy()  # (vocab_size,1)
    dscores[outsideWordIdx] = dscores[outsideWordIdx] - 1  #  y_hat minus y
    gradCenterVec = np.matmul(outsideVectors.T, dscores)  # (embedding_dim,1)
    gradOutsideVecs = np.outer(dscores,
                               centerWordVec)  # (vocab_size,embedding_dim)

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#14
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    value = np.dot(outsideVectors, centerWordVec)  # N x 1
    y_hat = softmax(value)
    loss = -np.log(y_hat[outsideWordIdx])  # Written assignment part a

    d_value = y_hat
    d_value[outsideWordIdx] -= 1  # y_hat - y, matrix shape (N, 1)
    gradCenterVec = outsideVectors.T.dot(
        d_value)  # shape d x 1  Written Assignment part b
    gradOutsideVecs = d_value[:, np.newaxis].dot(np.array([
        centerWordVec
    ]))  # (N, 1) dot (1, d) -> (N, d) written assignment part c

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#15
0
def naiveSoftmaxLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset
):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    in shape (word vector length, )
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors is
                    in shape (num words in vocab, word vector length) 
                    for all words in vocab (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     in shape (word vector length, )
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    in shape (num words in vocab, word vector length) 
                    (dJ / dU)
    """

    ### YOUR CODE HERE (~6-8 Lines)

    softmax_result = softmax(np.dot(outsideVectors, centerWordVec))
    naive_softmax_loss = - np.log(softmax_result)[outsideWordIdx]
    softmax_result[outsideWordIdx] -= 1

    gradCenterVec = np.dot(outsideVectors.T, softmax_result)
    gradOutsideVecs = np.dot(centerWordVec.reshape(centerWordVec.shape[0], 1), softmax_result.reshape(softmax_result.shape[0], 1).T)

    ### END YOUR CODE

    return naive_softmax_loss, gradCenterVec, gradOutsideVecs
示例#16
0
def naiveSoftmaxLossAndGradient(
        centerWordVec,
        outsideWordIdx,
        outsideVectors,
        dataset
):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE
    y = np.zeros(outsideVectors.shape[0])  # y: (N, )
    y[outsideWordIdx] = 1.
    y_hat = softmax(centerWordVec @ outsideVectors.T)  # y_hat: (N, )
    loss = -np.log(y_hat[outsideWordIdx])  # -log(y_o_hat)
    gradCenterVec = (y_hat - y) @ outsideVectors  # (D, )
    gradOutsideVecs = np.expand_dims(y_hat.T, axis=1) @ np.expand_dims(centerWordVec, axis=0)  # (N, D)
    gradOutsideVecs[outsideWordIdx] -= centerWordVec
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow. 

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#17
0
 def make_prediction_knn_weighted(self,
                                  x,
                                  y,
                                  k,
                                  source_som,
                                  target_som,
                                  source,
                                  mode='none'):
     source_activation, pos_source_activation = source_som.get_activations(
         x)
     source_activation = np.array(source_activation).reshape((-1, 1))
     target_activation = self.propagate_activation(source_activation,
                                                   source_som=source)
     # vote weighting alternatives
     if mode == 'softmax':
         # normalize using softmax. this brings some 0-valued votes to higher values
         vote_weights = softmax(target_activation)
     elif mode == 'none':
         # since hebbian weights and activations are normalized, the propagated activation's values
         # are already between 0 and 1
         vote_weights = target_activation
     elif mode == 'minmax':
         # minimum activation is mapped to 0 and maximum to 1
         min_ = min(target_activation)
         max_ = max(target_activation)
         vote_weights = (target_activation - min_) / float(max_ - min_)
     hebbian_bmu_index = np.argmax(target_activation)
     pos_activations = list(
         target_som._neuron_locations(target_som._m, target_som._n))
     closest_activations, closest_indexes = self.get_bmu_k_closest(
         target_som, target_activation, pos_activations, k)
     # perform a weighted majority vote
     class_count = [
         0 for i in set(
             [c[0] for c in target_som.bmu_class_dict.values() if c != []])
     ]
     for i in range(len(closest_indexes)):
         print(closest_indexes[i])
         bmu_class_list = target_som.bmu_class_dict[closest_indexes[i]]
         if bmu_class_list != []:
             class_count[
                 bmu_class_list[0]] += 1 * vote_weights[closest_indexes[i]]
     print(class_count)
     return np.argmax(class_count)
示例#18
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    ## 注意:coding部分的矢量和矩阵与written部分互为转置
    ## 比如词向量在coding部分就是行向量,在written部分就是列向量
    y_hat = softmax(np.dot(centerWordVec.reshape(1, -1),
                           outsideVectors.T))  #1行n列
    loss = -np.log(y_hat[0][outsideWordIdx])
    delta = y_hat.copy()  # delta表示y_hat - y,是1行n列的矢量
    delta[0][outsideWordIdx] = delta[0][outsideWordIdx] - 1
    gradCenterVec = np.dot(delta, outsideVectors)  #1行d列
    gradOutsideVecs = np.dot(delta.T, centerWordVec.reshape(1, -1))

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#19
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    inner_product = centerWordVec.dot(
        outsideVectors.T
    )  # centerWordVec has shape (1, d) and outsideVectors has shape (V, d)
    y_hat = softmax(inner_product)  # should have shape (1, V),
    y = np.zeros(y_hat.shape)
    y[0, outsideWordIdx] = 1
    loss = -np.log(y_hat[0, outsideWordIdx])
    gradCenterVec = np.sum((y_hat - y).T * outsideVectors,
                           axis=0,
                           keepdims=True)  # (1, d)
    gradOutsideVecs = (y_hat - y).T * centerWordVec  # (V, d)
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#20
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """
    loss = 0
    gradCenterVec = 0
    gradOutsideVecs = 0
    x = centerWordVec @ outsideVectors.T
    out_probs = softmax(x)
    grad1 = out_probs
    loss = np.sum(-np.log(grad1))
    grad1[outsideWordIdx] -= 1
    gradCenterVec = grad1 @ outsideVectors
    gradOutsideVecs = np.outer(grad1, centerWordVec)

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    ### END YOUR CODE
    return loss, gradCenterVec, gradOutsideVecs
示例#21
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    # YOUR CODE HERE
    v_c, u_o, U = centerWordVec, outsideWordIdx, outsideVectors
    # print('v_c shape {}, U shape {}'.format(v_c.shape, U.shape))
    v_c = v_c.reshape(-1, 1)  # Transform v_c to a column vector (N, 1)
    prob = softmax(np.dot(U, v_c).reshape(-1)).reshape(-1, 1)
    # print("Shpae is ", prob.shape)
    loss = -np.log(prob[outsideWordIdx])
    delta = prob.copy()
    delta[outsideWordIdx] -= 1  # the true y is a one-hot vector
    gradCenterVec = np.dot(U.T, delta).flatten()
    gradOutsideVecs = np.dot(delta, v_c.T)
    # Please use the provided softmax function (imported earlier in this file)
    # This numerically stable implementation helps you avoid issues pertaining
    # to integer overflow.

    # END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#22
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
                    
    Note:
     we usually use column vector convention (i.e., vectors are in column form) for vectors in matrix U and V (in the handout)
     but for ease of implementation/programming we usually use row vectors (representing vectors in row form).
    """

    ### YOUR CODE HERE
    y_hat = softmax(np.dot(outsideVectors,
                           centerWordVec))  # U*vc: (w x e) * (e x 1) = (w x 1)
    loss = -np.log(y_hat[outsideWordIdx])

    y = np.zeros(len(outsideVectors))
    y[outsideWordIdx] = 1  # one hot vector
    gradCenterVec = np.dot(outsideVectors.T, y_hat -
                           y)  # UT*(y_hat-y): (e x w) * (e x 1) = (e x 1)
    gradOutsideVecs = np.dot(np.expand_dims(centerWordVec, 1), (np.expand_dims(
        y_hat - y, 0))).T  # vc(y_hat-y)T: (e x 1) * (1 x w) = (e x w)
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#23
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE
    scalar_all = np.dot(outsideVectors, centerWordVec)
    probs = softmax(scalar_all)
    loss = -np.log(probs[outsideWordIdx])
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer oveoutsideVectors
    exponents = np.exp(scalar_all)
    #print(outsideVectors[outsideWordIdx,:].shape,np.sum(outsideVectors*exponents.reshape((-1,1)), axis = 0).shape)
    gradCenterVec = -outsideVectors[outsideWordIdx, :] + np.sum(
        outsideVectors * exponents.reshape(
            (-1, 1)), axis=0) / np.sum(exponents)
    gradOutsideVecs = np.dot(exponents.reshape(
        (-1, 1)), centerWordVec.reshape((1, -1))) / np.sum(exponents)
    gradOutsideVecs[outsideWordIdx] -= centerWordVec
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#24
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models. For those unfamiliar with numpy notation, note 
    that a numpy ndarray with a shape of (x, ) is a one-dimensional array, which
    you can effectively treat as a vector with length x.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    in shape (word vector length, )
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors is
                    in shape (num words in vocab, word vector length) 
                    for all words in vocab (tranpose of U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     in shape (word vector length, )
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    in shape (num words in vocab, word vector length) 
                    (dJ / dU)
    """

    ### YOUR CODE HERE (~6-8 Lines)
    print(centerWordVec, outsideWordIdx, outsideVectors, dataset)

    loss = softmax(centerWordVec, outsideVectors[outsideWordIdx])

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#25
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    theta = outsideVectors.dot(centerWordVec)
    y_pred = softmax(theta)
    y_true = np.zeros_like(y_pred)
    y_true[outsideWordIdx] = 1
    loss = -1 * np.log(y_pred[outsideWordIdx])
    gradCenterVec = (y_pred - y_true).dot(outsideVectors)
    gradOutsideVecs = (y_pred - y_true).reshape(y_pred.size, 1).dot(
        centerWordVec.reshape(1, centerWordVec.size))

    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    # softmax applied to v_c * u_o
    y_hat = softmax(np.dot(centerWordVec, outsideVectors.T))

    # loss calculated by -log(y_hat_o)
    loss = -np.log(y_hat)[outsideWordIdx]

    # update y_hat_o to get y_hat - y (this holds because y is just a one-hot encoded vector with a 1 for the true outside word)
    y_hat[outsideWordIdx] -= 1

    # U(y_hat - y)
    # this equation is calculated by finding the partial derivative of J_naive-softmax(v_c, o, U) with respect to v_c
    gradCenterVec = np.dot(y_hat, outsideVectors)

    # v_c(y_hat - y)^T
    # this equation is calculated by finding the partial derivative of J_naive-softmax(v_c, o, U) with respect to U
    gradOutsideVecs = np.outer(y_hat, centerWordVec)

    return loss, gradCenterVec, gradOutsideVecs
示例#27
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    y_hat = np.dot(outsideVectors, centerWordVec)
    y_softmax = softmax(y_hat)
    y_ = np.zeros(outsideVectors.shape[0])
    y_[outsideWordIdx] = 1
    loss = -1 * np.log(y_softmax[outsideWordIdx])
    u_o = outsideVectors[outsideWordIdx]
    #gradCenterVec= np.sum(np.multiply(np.transpose(outsideVectors),y_softmax), axis=1) - u_o
    gradCenterVec = np.dot(np.transpose(outsideVectors), y_softmax - y_)
    gradOutsideVecs = np.dot(centerWordVec.reshape(centerWordVec.shape[0], 1),
                             (y_softmax - y_).reshape(1, y_softmax.shape[0]))
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
    """

    ### YOUR CODE HERE

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.
    y_hat = softmax(np.dot(centerWordVec.reshape([1, -1]),
                           outsideVectors.T))  # 1xD product DxC => 1xC
    loss = -np.log(y_hat[0][outsideWordIdx])  # scalar

    gt = np.zeros(y_hat.shape)
    gt[0][outsideWordIdx] = 1  # 1xC
    delta = (y_hat - gt)
    gradCenterVec = np.dot(delta, outsideVectors)  # 1xC product CxD => 1xD

    gradOutsideVecs = np.dot(delta.T, centerWordVec.reshape(
        1, -1))  # Cx1 product 1xD => CxD
    ### END YOUR CODE
    return loss, gradCenterVec, gradOutsideVecs
示例#29
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     dJ / dv_c = U(y_hat - y)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                     dJ / dU =
    """

    # YOUR CODE HERE

    # Please use the provided softmax function (imported earlier in this file)
    # This numerically stable implementation helps you avoid issues pertaining
    # to integer overflow.

    y_hat = outsideVectors @ centerWordVec  # N
    y_hat = softmax(y_hat)
    loss = -np.log(y_hat[outsideWordIdx])
    d_val = y_hat  # N
    d_val[outsideWordIdx] -= 1
    gradCenterVec = outsideVectors.T @ d_val  # U: N * D, d_val: N -> D
    # d_val: N, vc: D -> N * D
    gradOutsideVecs = d_val[:, np.newaxis] * centerWordVec
    # END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs
示例#30
0
def naiveSoftmaxLossAndGradient(centerWordVec, outsideWordIdx, outsideVectors,
                                dataset):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)

    Note:
     we usually use column vector convention (i.e., vectors are in column form) for vectors in matrix U and V (in the handout)
     but for ease of implementation/programming we usually use row vectors (representing vectors in row form).
    """

    # Please use the provided softmax function (imported earlier in this file)
    # This numerically stable implementation helps you avoid issues pertaining
    # to integer overflow.

    sm = softmax(outsideVectors.dot(centerWordVec))
    loss = -np.log(sm[outsideWordIdx])
    gradCenterVec = -outsideVectors[outsideWordIdx] + sm.dot(outsideVectors)
    gradOutsideVecs = np.zeros_like(outsideVectors)
    neg_mask = np.arange(gradOutsideVecs.shape[0]) != outsideWordIdx
    gradOutsideVecs[neg_mask, :] = sm[neg_mask].reshape(-1, 1) * np.tile(
        centerWordVec, (len(sm[neg_mask]), 1))
    gradOutsideVecs[outsideWordIdx] = (sm[outsideWordIdx] - 1) * centerWordVec
    return loss, gradCenterVec, gradOutsideVecs