Пример #1
0
def compute_gradients(dL_da2, da2_dz2, dz2_dW2, dz2_db2, dz2_da1, da1_dz1,
                      dz1_dW1, dz1_db1):
    '''
       Given the local gradients, compute the gradient of the loss function L w.r.t. model parameters: the weights W1, W2 and biases b1 and b2.
        Input: see details in the above functions.
        Output:
            dL_dW2: the gradient of the loss function L w.r.t. the weight matrix W2
            dL_db2: the gradient of the loss function L w.r.t. the biases b2
            dL_dW1: the gradient of the loss function L w.r.t. the weight matrix W1
            dL_db1: the gradient of the loss function L w.r.t. the biases b1
        Hint: you could re-use the functions in problem1, such as sr.compute_dL_dz(...)
    '''

    #########################################
    ## INSERT YOUR CODE HERE
    # the 2nd layer
    dL_dz2 = sr.compute_dL_dz(dL_da2, da2_dz2)
    dL_dW2 = sr.compute_dL_dW(dL_dz2, dz2_dW2)
    dL_db2 = sr.compute_dL_db(dL_dz2, dz2_db2)

    # the 1st layer
    dL_da1 = compute_dL_da1(dL_dz2, dz2_da1)
    dL_dz1 = np.multiply(dL_da1, da1_dz1)
    dL_dW1 = sr.compute_dL_dW(dL_dz1, dz1_dW1)
    dL_db1 = sr.compute_dL_db(dL_dz1, dz1_db1)

    #########################################

    return dL_dW2, dL_db2, dL_dW1, dL_db1
Пример #2
0
def compute_dL_dz1(dL_da1, da1_dz1):
    '''
        Compute local gradient of the loss function L w.r.t. the logits z1 using chain rule.
       (2 points)
        Input:
            dL_da1: the gradient of the loss function L w.r.t. the activations a1
            da1_dz1: the gradient of the activations z1 L w.r.t. the logits z1
        Output:
            dL_dz1: the partial gradient of the loss function w.r.t. the logits z1, a numpy float vector of shape h by 1. 
                   Each element represents the partial gradient of the loss function L w.r.t. the i-th logit z1[i]:  d_L / d_z1[i]
    '''
    #########################################
    ## INSERT YOUR CODE HERE
    dL_dz1 = sr.compute_dL_dW(dL_da1, da1_dz1)

    #########################################
    return dL_dz1