def test_with_reg(): A3, Y_assess, parameters = compute_cost_with_regularization_test_case() print("cost = " + str( compute_cost_with_regularization(A3, Y_assess, parameters, lambd=0.1))) X_assess, Y_assess, cache = backward_propagation_with_regularization_test_case( ) grads = backward_propagation_with_regularization(X_assess, Y_assess, cache, lambd=0.7) print("dW1 = " + str(grads["dW1"])) print("dW2 = " + str(grads["dW2"])) print("dW3 = " + str(grads["dW3"])) parameters = model(train_X, train_Y, lambd=0.7) print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with L2-regularization") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def dropout_test(): X_assess, parameters = forward_propagation_with_dropout_test_case() A3, cache = forward_propagation_with_dropout(X_assess, parameters, keep_prob=0.7) print("A3 = " + str(A3)) X_assess, Y_assess, cache = backward_propagation_with_dropout_test_case() gradients = backward_propagation_with_dropout(X_assess, Y_assess, cache, keep_prob=0.8) print("dA1 = " + str(gradients["dA1"])) print("dA2 = " + str(gradients["dA2"])) parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3) print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(): plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' train_X, train_Y, test_X, test_Y = load_2D_dataset() #parameters = model(train_X, train_Y) #parameters = model(train_X, train_Y, lambd = 0.7) parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3) print("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.figure() plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) plt.show()
def print_decision(X, Y): plt.close() plt.title("Model") axes = plt.gca() axes.set_xlim([-0.75, 0.4]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x:predict_dec(parameters, x.T), X, Y) return
def plt_decision_boundary(title): plt.title(title) axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) plt.show()
def test_without_reg(): parameters = model(train_X, train_Y) print("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model without regularization") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(regularization='no'): train_X, train_Y, test_X, test_Y = load_2D_dataset() plt.show() if regularization == 'no': parameters = model(train_X, train_Y) plt.title("Model without regularization") elif regularization == 'L2': parameters = model(train_X, train_Y, lambd=0.7) plt.title("Model with L2 regularization") else: parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3) plt.title("Model with dropout") print("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(): # 导入数据,并绘制红蓝点分布图 train_X, train_Y, test_X, test_Y = load_2D_dataset( ) # load_2D_dataset()函数中将 c=train_Y 修改为 c=np.squeeze(train_Y) # 使用 dropout 正则化 # 使用带 dropout 正则化的神经网络模型训练参数 parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3) # 预测并打印准确率 print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) # 绘制决策边界图 plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) """
params = model(train_X, train_Y, is_plot=True) print("Train set:") predictions_train = init_utils.predict(train_X, train_Y, params) print("Test set:") predictions_test = init_utils.predict(test_X, test_Y, params) # 绘制不正则化模型的decision boundary # In[8]: plt.title("No regularization") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(params, x.T), train_X, np.squeeze(train_Y)) # 从decision boundary的线条以及训练集测试集的准确率来看,当前不使用正则化的模型出现过拟合的问题。 # In[11]: def compute_cost_with_regularization(A3, Y, params, lambd): m = Y.shape[1] W1 = params["W1"] W2 = params["W2"] W3 = params["W3"] orig_cost = reg_utils.compute_cost(A3, Y) cost = orig_cost + lambd / (2 * m) * ( np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3)))
parameters = model(train_X, train_Y) print("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) # The train accuracy is 94.8% while the test accuracy is 91.5%. This is the **baseline model** (you will observe the impact of regularization on this model). Run the following code to plot the decision boundary of your model. # In[5]: plt.title("Model without regularization") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) # The non-regularized model is obviously overfitting the training set. It is fitting the noisy points! Lets now look at two techniques to reduce overfitting. # ## 2 - L2 Regularization # # The standard way to avoid overfitting is called **L2 regularization**. It consists of appropriately modifying your cost function, from: # $$J = -\frac{1}{m} \sum\limits_{i = 1}^{m} \large{(}\small y^{(i)}\log\left(a^{[L](i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right) \large{)} \tag{1}$$ # To: # $$J_{regularized} = \small \underbrace{-\frac{1}{m} \sum\limits_{i = 1}^{m} \large{(}\small y^{(i)}\log\left(a^{[L](i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right) \large{)} }_\text{cross-entropy cost} + \underbrace{\frac{1}{m} \frac{\lambda}{2} \sum\limits_l\sum\limits_k\sum\limits_j W_{k,j}^{[l]2} }_\text{L2 regularization cost} \tag{2}$$ # # Let's modify your cost and observe the consequences. # # **Exercise**: Implement `compute_cost_with_regularization()` which computes the cost given by formula (2). To calculate $\sum\limits_k\sum\limits_j W_{k,j}^{[l]2}$ , use : # ```python
# 记录并打印成本 if i % 1000 == 0: ## 记录成本 costs.append(cost) if (print_cost and i % 10000 == 0): # 打印成本 print("第" + str(i) + "次迭代,成本值为:" + str(cost)) # 是否绘制成本曲线图 if is_plot: plt.plot(costs) plt.ylabel('cost') plt.xlabel('iterations (x1,000)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # 返回学习后的参数 return parameters parameters = model(train_X, train_Y,is_plot=True) print("训练集:") predictions_train = reg_utils.predict(train_X, train_Y, parameters) print("测试集:") predictions_test = reg_utils.predict(test_X, test_Y, parameters) plt.title("Model without regularization") axes = plt.gca() axes.set_xlim([-0.75,0.40]) axes.set_ylim([-0.75,0.65]) reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(parameters, x.T), train_X, train_Y)
# 使用L2正则化 # parameters = model(train_X, train_Y, lambd=0.7,is_plot=True) # print("使用正则化,训练集:") # predictions_train = reg_utils.predict(train_X, train_Y, parameters) # print("使用正则化,测试集:") # predictions_test = reg_utils.predict(test_X, test_Y, parameters) # # 决策边界 # plt.title("Model with L2-regularization") # axes = plt.gca() # axes.set_xlim([-0.75,0.40]) # axes.set_ylim([-0.75,0.65]) # reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(parameters, x.T), train_X, train_Y) # 使用dropout parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3, is_plot=True) print("使用随机删除节点,训练集:") predictions_train = reg_utils.predict(train_X, train_Y, parameters) print("使用随机删除节点,测试集:") reg_utils.predictions_test = reg_utils.predict(test_X, test_Y, parameters) plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) reg_utils.plot_decision_boundary( lambda x: reg_utils.predict_dec(parameters, x.T), train_X, train_Y)
parameters = model(train_X, train_Y) print ("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print ("On the test set:") predictions_test = predict(test_X, test_Y, parameters) # The train accuracy is 94.8% while the test accuracy is 91.5%. This is the **baseline model** (you will observe the impact of regularization on this model). Run the following code to plot the decision boundary of your model. # In[5]: plt.title("Model without regularization") axes = plt.gca() axes.set_xlim([-0.75,0.40]) axes.set_ylim([-0.75,0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) # The non-regularized model is obviously overfitting the training set. It is fitting the noisy points! Lets now look at two techniques to reduce overfitting. # ## 2 - L2 Regularization # # The standard way to avoid overfitting is called **L2 regularization**. It consists of appropriately modifying your cost function, from: # $$J = -\frac{1}{m} \sum\limits_{i = 1}^{m} \large{(}\small y^{(i)}\log\left(a^{[L](i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right) \large{)} \tag{1}$$ # To: # $$J_{regularized} = \small \underbrace{-\frac{1}{m} \sum\limits_{i = 1}^{m} \large{(}\small y^{(i)}\log\left(a^{[L](i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right) \large{)} }_\text{cross-entropy cost} + \underbrace{\frac{1}{m} \frac{\lambda}{2} \sum\limits_l\sum\limits_k\sum\limits_j W_{k,j}^{[l]2} }_\text{L2 regularization cost} \tag{2}$$ # # Let's modify your cost and observe the consequences. # # **Exercise**: Implement `compute_cost_with_regularization()` which computes the cost given by formula (2). To calculate $\sum\limits_k\sum\limits_j W_{k,j}^{[l]2}$ , use : # ```python
db2 = 1./m*np.sum(dZ2,axis = 1,keepdims = True) dA1 = np.dot(W2.T,dZ2) dA1 = dA1*D1 #步骤1:使用正向传播期间相同的节点,舍弃那些关闭的节点 dA1 = dA1/keep_prob#步骤2:缩放未舍弃的节点(不为0)的值 dZ1 = np.multiply(dA1,np.int64(A1>0)) dW1 = 1/m*np.dot(dZ1,X.T) db1 = 1./m*np.sum(dZ1,axis = 1,keepdims=True) gradients = { 'dZ3':dZ3,'dW3':dW3,'db3':db3,'dA2':dA2, 'dZ2':dZ2,'dW2':dW2,'db2':db2,'dA1':dA1, 'dZ1':dZ1,'dW1':dW1,'db1':db1 } return gradients parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3,is_plot=True) print("使用随机删除节点,训练集:") predictions_train = reg_utils.predict(train_X, train_Y, parameters) print("使用随机删除节点,测试集:") reg_utils.predictions_test = reg_utils.predict(test_X, test_Y, parameters) plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(parameters, x.T), train_X, train_Y)
# axes.set_xlim([-0.75,0.40]) # axes.set_ylim([-0.75,0.65]) # plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, np.reshape(train_Y,-1)) sep("Dropout") X_assess, parameters = forward_propagation_with_dropout_test_case() A3, cache = forward_propagation_with_dropout(X_assess, parameters, keep_prob = 0.7) print ("A3 = " + str(A3)) X_assess, Y_assess, cache = backward_propagation_with_dropout_test_case() gradients = backward_propagation_with_dropout(X_assess, Y_assess, cache, keep_prob = 0.8) print ("dA1 = " + str(gradients["dA1"])) print ("dA2 = " + str(gradients["dA2"])) parameters = model(train_X, train_Y, keep_prob = 0.86, learning_rate = 0.3) print ("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print ("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75,0.40]) axes.set_ylim([-0.75,0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, np.reshape(train_Y,-1))
def main2(): #%matplotlib inline plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' train_X, train_Y, test_X, test_Y = load_2D_dataset() parameters = model(train_X, train_Y) print("On the training set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) #################### A3, Y_assess, parameters = compute_cost_with_regularization_test_case() print("cost = " + str( compute_cost_with_regularization(A3, Y_assess, parameters, lambd=0.1))) #################### X_assess, Y_assess, cache = backward_propagation_with_regularization_test_case( ) grads = backward_propagation_with_regularization(X_assess, Y_assess, cache, lambd=0.7) print("dW1 = " + str(grads["dW1"])) print("dW2 = " + str(grads["dW2"])) print("dW3 = " + str(grads["dW3"])) parameters = model(train_X, train_Y, lambd=0.7) print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with L2-regularization") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) #################### X_assess, parameters = forward_propagation_with_dropout_test_case() A3, cache = forward_propagation_with_dropout(X_assess, parameters, keep_prob=0.7) print("A3 = " + str(A3)) #################### X_assess, Y_assess, cache = backward_propagation_with_dropout_test_case() gradients = backward_propagation_with_dropout(X_assess, Y_assess, cache, keep_prob=0.8) print("dA1 = " + str(gradients["dA1"])) print("dA2 = " + str(gradients["dA2"])) parameters = model(train_X, train_Y, keep_prob=0.86, learning_rate=0.3) print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with dropout") axes = plt.gca() axes.set_xlim([-0.75, 0.40]) axes.set_ylim([-0.75, 0.65]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)