def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one ### START CODE HERE ### (approx. 3 lines) thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i] + epsilon # Step 2 J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) # Step 3 ### END CODE HERE ### # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". ### START CODE HERE ### (approx. 3 lines) thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i] - epsilon # Step 2 J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3 ### END CODE HERE ### # Compute gradapprox[i] ### START CODE HERE ### (approx. 1 line) gradapprox[i] = (J_plus[i] - J_minus[i]) /(2 * epsilon) ### END CODE HERE ### # Compare gradapprox to backward propagation gradients by computing difference. ### START CODE HERE ### (approx. 1 line) numerator = np.linalg.norm(grad - gradapprox) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator # Step 3' ### END CODE HERE ### if difference > 2e-7: print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) e = epsilon # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one ### START CODE HERE ### (approx. 3 lines) thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i][0] + e # Step 2 J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) # Step 3 ### END CODE HERE ### # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". ### START CODE HERE ### (approx. 3 lines) thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i][0] - e # Step 2 J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3 ### END CODE HERE ### # Compute gradapprox[i] ### START CODE HERE ### (approx. 1 line) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2*e) ### END CODE HERE ### # Compare gradapprox to backward propagation gradients by computing difference. ### START CODE HERE ### (approx. 1 line) numerator = np.linalg.norm(gradapprox - grad) denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad) difference = numerator / denominator # Step 3' ### END CODE HERE ### if difference > 2e-7: print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7): parameters_values, keys = gc_utils.dictionary_to_vector(parameters) grad = gc_utils.gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): # 计算J_plus[i] theta_plus = np.copy(parameters_values) theta_plus[i][0] = theta_plus[i][0] + epsilon J_plus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_plus)) # 计算J_minus[i] theta_minus = np.copy(parameters_values) theta_minus[i][0] = theta_minus[i][0] - epsilon J_minus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_minus)) # 计算gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference < 1e-7: print("梯度检查:梯度正常") else: print("梯度检测:梯度超出阈值") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaplus[i][0] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) difference = np.linalg.norm(grad - gradapprox) / ( np.linalg.norm(grad) + np.linalg.norm(gradapprox)) if difference > 1e-7: print("There is a mistake in the backward propagation! difference = " + str(difference)) else: print("Your backward propagation works perfectly fine! difference = " + str(difference)) return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i][0] + epsilon # Step 2 J_plus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaplus)) # Step 3 thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i][0] - epsilon # Step 2 J_minus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaminus)) # Step 3 gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) numerator = np.linalg.norm(gradapprox - grad) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator # Step 3' if difference > 1e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference > 2e-7: print("\033[93m" + "反向传播有问题! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "反向传播很完美! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient. Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient. """ # Sets up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # Computes gradapprox for i in range(num_parameters): # Computes J_plus[i]. thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) # Computes J_minus[i]. thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Computes gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # Compares gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference > 2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7): """ 检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度 参数: parameters - 包含参数“W1”,“b1”,"W2","b2","W3","b3"的python字典 grentients - grad_output_propagation_n的输出 包含与参数相关的成本梯度 x - 输入数据点,维度为(输入节点数量,1)\ y - 标签 epsilon - 计算输入的微小偏移以计算近似梯度 返回: difference - 近似梯度和后向传播梯度之间的差异 """ #初始化参数 parameters_values,keys = gc_utils.dictionary_to_vector(parameters) #keys用不到 print("parameters"+str(parameters)) print("parameters_values"+str(parameters_values)) grad = gc_utils.gradients_to_vector(gradients) print("gradients"+str(gradients)) print("grad"+str(grad)) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters,1)) J_minus = np.zeros((num_parameters,1)) gradapprox = np.zeros((num_parameters,1)) #计算gradapprox for i in range(num_parameters): #计算J——plus[i],输入:"parameters_values,epsilon",输出"J_plus[i]" thetaplus = np.copy(parameters_values) #strp1 thetaplus[i][0] = thetaplus[i][0] + epsilon #step2 J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus)) #step3 cache用不到 #计算J_minus[i].输入"parameters_values,epsilon",输出"J_minus[i]" thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus)) #计算gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) #通过计算差异比较gradapprox和后向传播梯度 numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference < 1e-7: print("梯度检查:梯度正常!") else: print("梯度检查:梯度超出阈值!") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameter s. X -- input datapoint, of shape (input size, 1) Y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient Returns: difference -- difference between approximated gradient and the backward propagation gradient """ # Set up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output: "J_plus[i]" # "_" is used because the function you have to outputs two parameters but we only care about the first one thetaplus = np.copy(parameters_values) thetaplus[i][0] += epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output: "J_minus[i]". thetaminus = np.copy(parameters_values) thetaminus[i][0] -= epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Compute gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # Compare gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm(gradapprox - grad) denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad) difference = numerator / denominator if difference > 1.2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ 检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度 参数: parameters - 包含参数“W1”,“b1”,“W2”,“b2”,“W3”,“b3”的python字典: grad_output_propagation_n的输出包含与参数相关的成本梯度。 x - 输入数据点,维度为(输入节点数量,1) y - 标签 epsilon - 计算输入的微小偏移以计算近似梯度 返回: difference - 近似梯度和后向传播梯度之间的差异 """ # 初始化参数 parameters_values, keys = gc_utils.dictionary_to_vector( parameters) # keys用不到 grad = gc_utils.gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # 计算gradapprox for i in range(num_parameters): # 计算J_plus [i]。输入:“parameters_values,epsilon”。输出=“J_plus [i]” thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i][0] + epsilon # Step 2 J_plus[i], cache = forward_propagation_n( X, Y, gc_utils.vector_to_dictionary(thetaplus)) # Step 3 ,cache用不到 # 计算J_minus [i]。输入:“parameters_values,epsilon”。输出=“J_minus [i]”。 thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i][0] - epsilon # Step 2 J_minus[i], cache = forward_propagation_n( X, Y, gc_utils.vector_to_dictionary(thetaminus)) # Step 3 ,cache用不到 # 计算gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # 通过计算差异比较gradapprox和后向传播梯度。 numerator = np.linalg.norm(grad - gradapprox) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator # Step 3' if difference < 1e-7: print("梯度检查:梯度正常!") else: print("梯度检查:梯度超出阈值!") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ check if backward_propagation_n computes correctly the gradient of the cost output arguments: parameters -- dictionary containing your parameters "W1","b1","W2","b2","W3","b3" grad -- output of backward_propagation_n x -- input datapoint,shape(input size,1) y -- true label epsilon -- tiny shift to the input to compute approximated gradient returns: difference -- difference between the approximated gradient """ #set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) #compute gradapprox for i in range(num_parameters): thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) print('grad.shape = ', grad.shape) print('gradapprox.shape = ', gradapprox.shape) numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference > 1e-7: print('there is a mistake in the backward propagation, difference = ' + str(difference)) else: print('your backward propagation works perfectly fine! difference = ' + str(difference)) return difference
def gradients_check(X, Y, lambd=0, keep_prob=1, init_method='he'): layers_dims = [X.shape[0], 5, 3, 1] # inintial params if init_method == 'zeros': params = init_zeros(layers_dims) elif init_method == 'random': params = init_random(layers_dims) elif init_method == 'he': params = init_he(layers_dims) else: print('Error: unexcepted init_method!') # compute grads a3, cache = forward_propagate_with_reg(X, params, keep_prob=keep_prob) grads = backward_propagate_with_reg(X, Y, cache, lambd=lambd, keep_prob=keep_prob) grads_vector = gc_utils.gradients_to_vector(grads) theta, keys = gc_utils.dictionary_to_vector(params) #转化成向量方便索引(n, 1) n = theta.shape[0] #参数个数 grads_approx_vector = np.zeros((n, 1)) # compute grads_approx for i in range(n): theta_p = np.copy(theta) theta_p[i, 0] += 1e-7 params_p = gc_utils.vector_to_dictionary(theta_p) theta_m = np.copy(theta) theta_m[i, 0] -= 1e-7 params_m = gc_utils.vector_to_dictionary(theta_m) a3_, cache_ = forward_propagate_with_reg(X, params_p, keep_prob=keep_prob) J_p = compute_loss_with_reg(a3_, Y, params_p, lambd=lambd) a3_, cache_ = forward_propagate_with_reg(X, params_m, keep_prob=keep_prob) J_m = compute_loss_with_reg(a3_, Y, params_m, lambd=lambd) d_approx = (J_p - J_m) / (2 * 1e-7) grads_approx_vector[i, 0] = d_approx # compute difference numerator = np.linalg.norm(grads_vector - grads_approx_vector) denominator = np.linalg.norm(grads_vector) + np.linalg.norm( grads_approx_vector) diff = numerator / denominator return diff
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one ### START CODE HERE ### (approx. 3 lines) plus_copy = parameters_values.copy() plus_copy[i] = plus_copy[i] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(plus_copy)) # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". minus_copy = parameters_values.copy() minus_copy[i] = minus_copy[i] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(minus_copy)) # Compute gradapprox[i] ### START CODE HERE ### (approx. 1 line) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # Compare gradapprox to backward propagation gradients by computing difference. num = np.linalg.norm(grad - gradapprox) nom = np.linalg.norm(grad) + np.linalg.norm(gradapprox) diff = num / nom return diff
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] Jplus = np.zeros((num_parameters, 1)) Jminus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): thetaplus = np.copy(parameters_values) thetaplus[i][0] += epsilon Jplus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) thetaminus = np.copy(parameters_values) thetaminus[i][0] -= epsilon Jminus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) gradapprox[i] = (Jplus[i] - Jminus[i]) / (2. * epsilon) diff = np.linalg.norm(grad - gradapprox) / (np.linalg.norm(grad) + np.linalg.norm(gradapprox)) if diff > 1e-7: print('There is a mistake in backword propagation diff = {}'.format( diff)) else: print( 'Your backward propagation works well with diff = {}'.format(diff)) return diff
def gradient_check_n(parameters, grads, X, Y, epsilon=1e-7): ''' Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient ''' parameters_values, _ = dictionary_to_vector(parameters) grads_value = gradients_to_vector(grads) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradsapprox = np.zeros((num_parameters, 1)) for i in range(num_parameters): thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) gradsapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) numeretor = np.linalg.norm(grads_value - gradsapprox) denominator = np.linalg.norm(grads_value) + np.linalg.norm(gradsapprox) difference = numeretor / denominator if difference > 1e-7: print( 'There is a mistake in the backward propagation! difference = {}'. format(difference)) else: print( 'Your backward propagation worlks perfectly fine! difference = {}'. format(difference))
def gradient_check_n(parameters,gradients,X,Y,epsilon = 1e-7): ''' 检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度 :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的python字典 grad_output_propagation_n的输出包含与参数相关的成本梯度 :param gradients: :param X: 输入数据点,维度为(输入节点数量,1) :param Y: 标签 :param epsilon计算输入的微小偏移以计算近似梯度: :return: 近似梯度和后向传播梯度之间的差异 ''' #初始化参数 parameters_values , keys = gc_utils.dictionary_to_vector(parameters)#keys用不到,parameters_values是一个n行1列的矩阵 grad = gc_utils.gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters,1)) J_minus = np.zeros((num_parameters,1)) gradapprox = np.zeros(num_parameters,1) #计算gradapprox for i in range(num_parameters): #计算J_plus[i].输入'parameters_values,epsilon'.输出='J_plus[i]' thetaplus = np.copy(parameters_values) thetaplus[i][0]=thetaplus[i][0]+epsilon J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus)) #计算J_minus[i].输入:'parameters_values,epsilon',输出='J_minus[i]' thetaminus = np.copy(parameters_values) thetaminus[i][0]=thetaminus[i][0] - epsilon J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus)) #计算gradapprox[i] gradapprox[i] = (J_plus[i]-J_minus[i])/(2*epsilon) #通过计算差异比较gradapprox和后向传播梯度 numerator = np.linalg.norm(grad-gradapprox) denominator = np.linalg.norm(grad)+np.linalg.norm(gradapprox) difference = numerator/denominator if difference<1e-7: print('梯度检查:梯度正常') else: print('梯度检查:梯度超出阈值') return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7): # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] += epsilon # Step 2 J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus)) # Step 3 # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] -= epsilon # Step 2 J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3 # Compute gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2. * epsilon) # Compare gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm(grad - gradapprox) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator if difference > 1e-6: print ("There is a mistake in the backward propagation! difference = " + str(difference)) else: print ("Your backward propagation works perfectly fine! difference = " + str(difference)) return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): # 初始化参数 parameters_values, keys = gc_utils.dictionary_to_vector( parameters) # 将parameters字典转换为array grad = gc_utils.gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # 计算grad approx for i in range(num_parameters): # 遍历所有的参数 # 计算 J_plus[i] thetaplus = np.copy(parameters_values) thetaplus[i][0] = thetaplus[i][0] + epsilon J_plus[i], cache = forward_propagation_n( X, Y, gc_utils.vector_to_dictionary(thetaplus)) # cache用不到 # 计算 J_minus[i] thetaminus = np.copy(parameters_values) thetaminus[i][0] = thetaminus[i][0] - epsilon J_minus[i], cache = forward_propagation_n( X, Y, gc_utils.vector_to_dictionary(thetaminus)) # cache用不到 # 计算 grad apporx[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # 通过计算差异比较 gradapprox 和后向传播梯度 numerator = np.linalg.norm(grad - gradapprox) denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) difference = numerator / denominator if difference < 1e-7: print("Gradient Checking: 梯度正常!") else: print("Gradient Checking:梯度超出阈值!") return difference
return gradients You obtained some results on the fraud detection test set but you are not 100% sure of your model. Nobody's perfect! Let's implement gradient checking to verify if your gradients are correct. How does gradient checking work?. As in 1) and 2), you want to compare "gradapprox" to the gradient computed by backpropagation. The formula is still: ∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε(1) (1)∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε However, θθ is not a scalar anymore. It is a dictionary called "parameters". We implemented a function "dictionary_to_vector()" for you. It converts the "parameters" dictionary into a vector called "values", obtained by reshaping all parameters (W1, b1, W2, b2, W3, b3) into vectors and concatenating them. The inverse function is "vector_to_dictionary" which outputs back the "parameters" dictionary. Figure 2 : dictionary_to_vector() and vector_to_dictionary() You will need these functions in gradient_check_n() We have also converted the "gradients" dictionary into a vector "grad" using gradients_to_vector(). You don't need to worry about that. Exercise: Implement gradient_check_n(). Instructions: Here is pseudo-code that will help you implement the gradient check. For each i in num_parameters: To compute J_plus[i]: Set θ+θ+ to np.copy(parameters_values) Set θ+iθi+ to θ+i+εθi++ε Calculate J+iJi+ using to forward_propagation_n(x, y, vector_to_dictionary(θ+θ+ )). To compute J_minus[i]: do the same thing with θ−θ− Compute gradapprox[i]=J+i−J−i2εgradapprox[i]=Ji+−Ji−2ε
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i][0] + epsilon # Step 2 J_plus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaplus)) # Step 3 # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i][0] - epsilon # Step 2 J_minus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaminus)) # Step 3 # Compute gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # Compare gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm(grad - gradapprox) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator # Step 3' if difference > 2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference # What you should remember from this notebook: # Gradient checking verifies closeness between the gradients from backpropagation # and the numerical approximation of the gradient (computed using forward propagation). # Gradient checking is slow, so we don't run it in every iteration of training. # You would usually run it only to make sure your code is correct, # then turn it off and use backprop for the actual learning process.
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. x -- input datapoint, of shape (input size, 1) y -- true "label" epsilon -- tiny shift to the input to compute approximated gradient with formula(1) Returns: difference -- difference (2) between the approximated gradient and the backward propagation gradient """ # How to help implement gradient check. # Set-up variables parameters_values, _ = dictionary_to_vector( parameters ) # converts the "parameters" dictionary into a vector called "values" grad = gradients_to_vector( gradients) # convert gradients dictionary into a vector, "grads" num_parameters = parameters_values.shape[ 0] # get current shape of an array by assigning a tuple of array dimensions J_plus = np.zeros( (num_parameters, 1)) # initialize J_plus with zeros and number of parameter objects J_minus = np.zeros( (num_parameters, 1)) # initialize J_minus with zeros and number of parameter objects gradapprox = np.zeros(( num_parameters, 1)) # initialize gradapprox with zeros and number of parameter objects # Compute gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function outputs two parameters but only care about the first one thetaplus = np.copy(parameters_values) # Set theta to np.copy thetaplus[i][0] = thetaplus[i][0] + epsilon # Set theta_plus J_plus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary( thetaplus)) # Calculate J_plus using forward propagation # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". thetaminus = np.copy(parameters_values) # Set theta to np.copy thetaminus[i][0] = thetaminus[i][0] - epsilon # Set theta_minus J_minus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary( thetaminus)) # Calculate J_minus using forward propagation # Compute gradapprox[i] gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) # Compare gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm( grad - gradapprox) # compute the numerator using np.linag.norm(...) denominator = np.linalg.norm(grad) + np.linalg.norm( gradapprox ) # compute the denominator(need to call np.linag.norm(...) twice) difference = numerator / denominator # divide both if difference > 2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7): """ 梯度校验,检查后向传播是否正确计算前向传播输出的 cost 的梯度 :param parameters: 参数字典,包含 "W1", "b1", "W2", "b2", "W3", "b3": :param grad: 后向传播的输出, 包含与参数相关的 cost 梯度 :param x: 输入数据点, of shape (input size, 1) :param y: 正确的标签 :param epsilon: 输入的微小偏移,用来计算近似梯度 :return difference: 近似梯度和后向传播计算的梯度之间的差异 """ # Set-up variables parameters_values, _ = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients) num_parameters = parameters_values.shape[0] J_plus = np.zeros((num_parameters, 1)) J_minus = np.zeros((num_parameters, 1)) gradapprox = np.zeros((num_parameters, 1)) # 计算 gradapprox for i in range(num_parameters): # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]". # "_" is used because the function you have to outputs two parameters but we only care about the first one ### START CODE HERE ### (approx. 3 lines) thetaplus = np.copy(parameters_values) # Step 1 thetaplus[i][0] = thetaplus[i][0] + epsilon # Step 2 J_plus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaplus)) # Step 3 ### END CODE HERE ### # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]". ### START CODE HERE ### (approx. 3 lines) thetaminus = np.copy(parameters_values) # Step 1 thetaminus[i][0] = thetaminus[i][0] - epsilon # Step 2 J_minus[i], _ = forward_propagation_n( X, Y, vector_to_dictionary(thetaminus)) # Step 3 ### END CODE HERE ### # Compute gradapprox[i] ### START CODE HERE ### (approx. 1 line) gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon) ### END CODE HERE ### # 通过计算差异来比较 gradapprox 梯度和后向传播计算的梯度 ### START CODE HERE ### (approx. 1 line) numerator = np.linalg.norm(grad - gradapprox) # Step 1' denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox) # Step 2' difference = numerator / denominator # Step 3' ### END CODE HERE ### if difference > 2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference