示例#1
0
def backward_propagation_L2(X, Y, cache, lambd):
    '''
    反向传播,只在计算dW时发生了变化,其他地方不变
    '''
    (A1, Z1, W1, b1, A2, Z2, W2, b2, A3, Z3, W3, b3) = cache
    m = Y.shape[1]  # 样本个数

    dA3 = -(np.divide(Y, A3) - np.divide(1 - Y, 1 - A3))
    dZ3 = AF.sigimoid_backward(dA3, Z3)
    dW3 = 1. / m * np.dot(dZ3, A2.T) + (lambd * W3 / m)
    db3 = 1. / m * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dZ2 = AF.relu_backward(dA2, Z2)
    dW2 = 1. / m * np.dot(dZ2, A1.T) + (lambd * W2 / m)
    db2 = 1. / m * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = AF.relu_backward(dA1, Z1)
    dW1 = 1. / m * np.dot(dZ1, X.T) + (lambd * W1 / m)
    db1 = 1. / m * np.sum(dZ1, axis=1, keepdims=True)

    grads = {
        'dW1': dW1,
        'dW2': dW2,
        'dW3': dW3,
        'db1': db1,
        'db2': db2,
        'db3': db3
    }

    return grads
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """

    linear_cache, activation_cache = cache

    if (activation == 'sigmoid'):
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif (activation == 'relu'):
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
示例#3
0
def backward_propagation_dropout(X, Y, cache, keep_prob):
    '''
    加入dropout的反向传播
    '''
    (A1, D1, Z1, W1, b1, A2, D2, Z2, W2, b2, A3, Z3, W3, b3) = cache
    m = Y.shape[1]  # 样本个数

    dA3 = -(np.divide(Y, A3) - np.divide(1 - Y, 1 - A3))
    dZ3 = AF.sigimoid_backward(dA3, Z3)
    dW3 = 1. / m * np.dot(dZ3, A2.T)
    db3 = 1. / m * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dA2 = dA2 * D2  # step1 使用正向传播时未关闭的节点
    dA2 = dA2 / keep_prob  # step2 缩放未舍弃节点的值

    dZ2 = AF.relu_backward(dA2, Z2)
    dW2 = 1. / m * np.dot(dZ2, A1.T)
    db2 = 1. / m * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dA1 = dA1 * D1  # step1 使用正向传播时未关闭的节点
    dA1 = dA1 / keep_prob  # step2 缩放未舍弃节点的值

    dZ1 = AF.relu_backward(dA1, Z1)
    dW1 = 1. / m * np.dot(dZ1, X.T)
    db1 = 1. / m * np.sum(dZ1, axis=1, keepdims=True)

    grads = {
        'dW1': dW1,
        'dW2': dW2,
        'dW3': dW3,
        'db1': db1,
        'db2': db2,
        'db3': db3
    }

    return grads