def backward_propagation_L2(X, Y, cache, lambd): ''' 反向传播,只在计算dW时发生了变化,其他地方不变 ''' (A1, Z1, W1, b1, A2, Z2, W2, b2, A3, Z3, W3, b3) = cache m = Y.shape[1] # 样本个数 dA3 = -(np.divide(Y, A3) - np.divide(1 - Y, 1 - A3)) dZ3 = AF.sigimoid_backward(dA3, Z3) dW3 = 1. / m * np.dot(dZ3, A2.T) + (lambd * W3 / m) db3 = 1. / m * np.sum(dZ3, axis=1, keepdims=True) dA2 = np.dot(W3.T, dZ3) dZ2 = AF.relu_backward(dA2, Z2) dW2 = 1. / m * np.dot(dZ2, A1.T) + (lambd * W2 / m) db2 = 1. / m * np.sum(dZ2, axis=1, keepdims=True) dA1 = np.dot(W2.T, dZ2) dZ1 = AF.relu_backward(dA1, Z1) dW1 = 1. / m * np.dot(dZ1, X.T) + (lambd * W1 / m) db1 = 1. / m * np.sum(dZ1, axis=1, keepdims=True) grads = { 'dW1': dW1, 'dW2': dW2, 'dW3': dW3, 'db1': db1, 'db2': db2, 'db3': db3 } return grads
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if (activation == 'sigmoid'): dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif (activation == 'relu'): dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def backward_propagation_dropout(X, Y, cache, keep_prob): ''' 加入dropout的反向传播 ''' (A1, D1, Z1, W1, b1, A2, D2, Z2, W2, b2, A3, Z3, W3, b3) = cache m = Y.shape[1] # 样本个数 dA3 = -(np.divide(Y, A3) - np.divide(1 - Y, 1 - A3)) dZ3 = AF.sigimoid_backward(dA3, Z3) dW3 = 1. / m * np.dot(dZ3, A2.T) db3 = 1. / m * np.sum(dZ3, axis=1, keepdims=True) dA2 = np.dot(W3.T, dZ3) dA2 = dA2 * D2 # step1 使用正向传播时未关闭的节点 dA2 = dA2 / keep_prob # step2 缩放未舍弃节点的值 dZ2 = AF.relu_backward(dA2, Z2) dW2 = 1. / m * np.dot(dZ2, A1.T) db2 = 1. / m * np.sum(dZ2, axis=1, keepdims=True) dA1 = np.dot(W2.T, dZ2) dA1 = dA1 * D1 # step1 使用正向传播时未关闭的节点 dA1 = dA1 / keep_prob # step2 缩放未舍弃节点的值 dZ1 = AF.relu_backward(dA1, Z1) dW1 = 1. / m * np.dot(dZ1, X.T) db1 = 1. / m * np.sum(dZ1, axis=1, keepdims=True) grads = { 'dW1': dW1, 'dW2': dW2, 'dW3': dW3, 'db1': db1, 'db2': db2, 'db3': db3 } return grads