def main(initialization='he'): train_X, train_Y, test_X, test_Y = load_dataset() # try different initialization methods if initialization == 'zero': parameters = model(train_X, train_Y, initialization='zero') elif initialization == 'random': parameters = model(train_X, train_Y, initialization='random') else: parameters = model(train_X, train_Y, initialization='he') # make predictions print("-----The result of using " + initialization + "-----") print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) # plot the result plt.title("Model with " + initialization + " initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(): # 读取并绘制数据: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset() # 选择一个初始化方法 parameters = initialize_parameters_he([2, 4, 1]) # 打印随机初始化后各参数的值 print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) # 通过神经网络模型进行训练,得到参数 parameters = model(train_X, train_Y, initialization = "he") # 用训练得到的参数进行预测,并分别输出训练集和测试集的预测准确率 print ("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print ("On the test set:") predictions_test = predict(test_X, test_Y, parameters) # 打印预测结果 print (predictions_train) print (predictions_test) # 绘制红蓝点分离情况图 plt.title("Model with He initialization") axes = plt.gca() axes.set_xlim([-1.5,1.5]) axes.set_ylim([-1.5,1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, np.squeeze(train_Y)) # 注意最后一个参数不能直接用train_Y,要加上np.squeeze()
def main(): train_X, train_Y, test_X, test_Y = load_dataset() # not so good initialization parameters = model(train_X, train_Y, initialization="random") print("On the train set:") predict(train_X, train_Y, parameters) print("On the test set:") predict(test_X, test_Y, parameters) # good initialization parameters = model(train_X, train_Y, initialization="he") print("On the train set:") predict(train_X, train_Y, parameters) print("On the test set:") predict(test_X, test_Y, parameters) plt.title("Model with He initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(): plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset() parameters = model(train_X, train_Y, initialization = "he") print ("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print ("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.figure() plt.title("Model with He initialization") axes = plt.gca() axes.set_xlim([-1.5,1.5]) axes.set_ylim([-1.5,1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) plt.show()
print("Iteration " + str(i) + " Cost:" + str(cost)) if is_plot: plt.plot(costs) plt.ylabel("cost") plt.xlabel("#iterations") plt.title("Learning rate = " + str(learning_rate)) plt.show() return params # ### 初始化参数 # 首先读取训练数据并通过plt观察分布 # In[3]: train_X, train_Y, test_X, test_Y = init_utils.load_dataset(is_plot=True) # #### 全零初始化 # In[4]: def init_params_zeros(layer_dims): L = len(layer_dims) params = {} for i in range(1, L): params["W" + str(i)] = np.zeros((layer_dims[i], layer_dims[i - 1])) params["b" + str(i)] = np.zeros((layer_dims[i], 1)) return params
# In[22]: import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec get_ipython().magic('matplotlib inline') plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset() # You would like a classifier to separate the blue dots from the red dots. # ## 1 - Neural Network model # You will use a 3-layer neural network (already implemented for you). Here are the initialization methods you will experiment with: # - *Zeros initialization* -- setting `initialization = "zeros"` in the input argument. # - *Random initialization* -- setting `initialization = "random"` in the input argument. This initializes the weights to large random values. # - *He initialization* -- setting `initialization = "he"` in the input argument. This initializes the weights to random values scaled according to a paper by He et al., 2015. # # **Instructions**: Please quickly read over the code below, and run it. In the next part you will implement the three initialization methods that this `model()` calls. # In[23]:
# In[1]: import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec get_ipython().magic('matplotlib inline') plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset() # You would like a classifier to separate the blue dots from the red dots. # ## 1 - Neural Network model # You will use a 3-layer neural network (already implemented for you). Here are the initialization methods you will experiment with: # - *Zeros initialization* -- setting `initialization = "zeros"` in the input argument. # - *Random initialization* -- setting `initialization = "random"` in the input argument. This initializes the weights to large random values. # - *He initialization* -- setting `initialization = "he"` in the input argument. This initializes the weights to random values scaled according to a paper by He et al., 2015. # # **Instructions**: Please quickly read over the code below, and run it. In the next part you will implement the three initialization methods that this `model()` calls. # In[2]:
from init_utils import load_dataset import matplotlib.pyplot as plt from layers import * from multi_layer_nn import MultiLayersNN from plt_utils import plot_decision_boundary if __name__ == '__main__': X_train, Y_train, X_test, Y_test = load_dataset(is_plot=False) assert X_train.shape == (2, 300) assert Y_train.shape == (1, 300) assert X_test.shape == (2, 100) assert Y_test.shape == (1, 100) model = MultiLayersNN((X_train.shape[0], 10, 5, 1)) model.fit(X_train, Y_train, lr=0.1, print_cost_100=True, iter_num=10000) print(model.accuracy(X_train, Y_train)) print(model.accuracy(X_test, Y_test)) plot_decision_boundary(model.predict, (-1, 1, -1, 1), plt) plt.scatter(X_test[:, Y_test.squeeze() == 1][0, :], X_test[:, Y_test.squeeze() == 1][1, :]) plt.scatter(X_test[:, Y_test.squeeze() == 0][0, :], X_test[:, Y_test.squeeze() == 0][1, :]) plt.show()
import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets import init_utils #第一部分,初始化 import reg_utils #第二部分,正则化 import gc_utils #第三部分,梯度校验 #%matplotlib inline #如果你使用的是Jupyter Notebook,请取消注释。 plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' train_X, train_Y, test_X, test_Y = init_utils.load_dataset() plt.show() def model(X, Y, learning_rate=0.01, num_iterations=15000, print_cost=True, initialization="he", is_polt=True): """ 实现一个三层的神经网络:LINEAR ->RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID 参数: X - 输入的数据,维度为(2, 要训练/测试的数量) Y - 标签,【0 | 1】,维度为(1,对应的是输入的数据的标签) learning_rate - 学习速率 num_iterations - 迭代的次数 print_cost - 是否打印成本值,每迭代1000次打印一次 initialization - 字符串类型,初始化的类型【"zeros" | "random" | "he"】 is_polt - 是否绘制梯度下降的曲线图 返回 parameters - 学习后的参数 """ grads = {}
# %% os.chdir('./2-1/') # %% import gc_utils import init_utils import reg_utils plt.rcParams['figure.figsize'] = (7.0, 4.0) plt.rcParams['image.interpolation'] = 'nearest' # # plt.rcParams['image.cmap'] = 'gray' # %% [markdown] # # initialize weights # %% X_train, Y_train, X_test, Y_test = init_utils.load_dataset(is_plot=True) # %% def init_zeros(layers_dims): params = {} L = len(layers_dims) for l in range(1, L): params[f'W{l}'] = np.zeros((layers_dims[l], layers_dims[l - 1])) params[f'b{l}'] = np.zeros((layers_dims[l], 1)) return params def init_random(layers_dims): params = {} L = len(layers_dims)
""" import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec #%matplotlib inline plt.rcParams['figure.figsize'] = (5.0, 5.0) #set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' """ load image dataset: blue/red dots in circles """ train_X, train_Y, test_X, test_Y = load_dataset(plot=False) def model(X, Y, learning_rate=0.01, num_iterations=15000, print_cost=True, plot_loss=True, initialization="he"): """ Implement a 3-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (containing 0 for red dots; 1 for blue dots), of shape (1, number of examples)
def main1(): #%matplotlib inline plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset() parameters = initialize_parameters_zeros([3, 2, 1]) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) parameters = model(train_X, train_Y, initialization="zeros") print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) print("predictions_train = " + str(predictions_train)) print("predictions_test = " + str(predictions_test)) plt.title("Model with Zeros initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) parameters = initialize_parameters_random([3, 2, 1]) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) parameters = model(train_X, train_Y, initialization="random") print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) print(predictions_train) print(predictions_test) plt.title("Model with large random initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) parameters = initialize_parameters_he([2, 4, 1]) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) parameters = model(train_X, train_Y, initialization="he") print("On the train set:") predictions_train = predict(train_X, train_Y, parameters) print("On the test set:") predictions_test = predict(test_X, test_Y, parameters) plt.title("Model with He initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets import init_utils ##第一部分,初始化 import reg_utils ##第二部分,正则化 import gc_utils ##第三部分,梯度校验 #%matplotlib inline #如果你使用的是Jupyter Notebook,请取消注释 plt.rcParams['figure.figsize'] = (7.0,4.0)#set default size of plots图片像素 plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' #显示的图表大小为10,图形的插值是以最近为原则,图像颜色是灰色??? train_X,train_Y,test_X,test_Y = init_utils.load_dataset(is_plot = False) # plt.show() #尝试三种初始化方法,1初始化为0,2初始化为随机数,3抑制度异常初始化 def model(X,Y,learning_rate = 0.01,num_iterations = 15000,print_cost = True,initialization='he',is_polt = True): ''' 实现一个三层的神经网络:linear->relu->linear->relu->linear->sigmoid :param X: 输入的数据,维度为(2,要训练/测试的数量) :param Y: 标签,[0/1],维度为(1,对应的是输入的数据的标签) :param learning_rate: 学习速率 :param num_iterations: 迭代的次数 :param print_cost: 是否打印成本值,每次迭代1000次打印一次 :param initialize: 字符串类型,初始化的类型['zero'|'random'|'he'] :param is_plot: 是否绘制梯度下降的曲线图 :return:
import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec #%matplotlib inline plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # Load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y, dsplot = load_dataset() dsplot.show() # classifier to separate the blue dots from the red dots def model(X, Y, learning_rate=0.01, num_iterations=15000, print_cost=True, initialization="he"): """ Implements a three-layer neural network: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (containing 0 for red dots; 1 for blue dots), of shape (1, number of examples) learning_rate -- learning rate for gradient descent