def main(): plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' train_X, train_Y = load_dataset() layers_dims = [train_X.shape[0], 5, 2, 1] #parameters = model(train_X, train_Y, layers_dims, optimizer = "gd") #parameters = model(train_X, train_Y, layers_dims, beta = 0.9, optimizer = "momentum") parameters = model(train_X, train_Y, layers_dims, optimizer="adam") # Predict predictions = predict(train_X, train_Y, parameters) # Plot decision boundary #plt.title("Model with Gradient Descent optimization") #plt.title("Model with Momentum optimization") plt.title("Model with Adam optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) plt.show()
def main(optimizer='gd'): # load and plot the data points train_X, train_Y = load_dataset() plt.show() layers_dims = [train_X.shape[0], 5, 2, 1] # try different optimization methods if optimizer == 'gd': # mini-batch gradient descent parameters = model(train_X, train_Y, layers_dims, optimizer="gd") plt.title("Model with Gradient Descent optimization") elif optimizer == 'momentum': # mini-batch gradient descent with momentum parameters = model(train_X, train_Y, layers_dims, optimizer="momentum") plt.title("Model with Momentum optimization") elif optimizer == 'adam': # mini-batch gradient descent with Adam parameters = model(train_X, train_Y, layers_dims, optimizer="adam") plt.title("Model with Adam optimization") else: print("No such optimization method named " + optimizer) return # Predict predictions = predict(train_X, train_Y, parameters) # Plot decision boundary axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
def main(): train_X, train_Y = load_dataset() layers_dims = [train_X.shape[0], 5, 2, 1] print("training with mini-batch gd optimizer") learned_parameters_gd = model(train_X, train_Y, layers_dims, optimizer="gd") predict(train_X, train_Y, learned_parameters_gd) # Plot decision boundary plt.title("model with Gradient Descent optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary(lambda x: predict_dec(learned_parameters_gd, x.T), train_X, train_Y) print("training with momentum optimizer") learned_parameters_momentum = model(train_X, train_Y, layers_dims, beta=0.9, optimizer="momentum") predict(train_X, train_Y, learned_parameters_momentum) # Plot decision boundary plt.title("model with Momentum optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary( lambda x: predict_dec(learned_parameters_momentum, x.T), train_X, train_Y) print("training with adam optimizer") learned_parameters_adam = model(train_X, train_Y, layers_dims, optimizer="adam") predict(train_X, train_Y, learned_parameters_adam) # Plot decision boundary plt.title("model with Adam optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary(lambda x: predict_dec(learned_parameters_adam, x.T), train_X, train_Y) return None
def mini_batch_with_adam_mode(): train_X, train_Y = load_dataset() # train 3-layer model layers_dims = [train_X.shape[0], 5, 2, 1] parameters = model(train_X, train_Y, layers_dims, optimizer="adam") # Predict predictions = predict(train_X, train_Y, parameters) # Plot deci sion boundary plt.title("Model with Adam optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5]) plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t=2) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) print("v[\"dW1\"] = " + str(v["dW1"])) print("v[\"db1\"] = " + str(v["db1"])) print("v[\"dW2\"] = " + str(v["dW2"])) print("v[\"db2\"] = " + str(v["db2"])) print("s[\"dW1\"] = " + str(s["dW1"])) print("s[\"db1\"] = " + str(s["db1"])) print("s[\"dW2\"] = " + str(s["dW2"])) print("s[\"db2\"] = " + str(s["db2"])) train_X, train_Y = load_dataset() def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): """
# <td > [[ 5.49507194e-05] # [ 2.75494327e-03] # [ 5.50629536e-04]] </td> # </tr> # </table> # # You now have three working optimization algorithms (mini-batch gradient descent, Momentum, Adam). Let's implement a model with each of these optimizers and observe the difference. # ## 5 - Model with different optimization algorithms # # Lets use the following "moons" dataset to test the different optimization methods. (The dataset is named "moons" because the data from each of the two classes looks a bit like a crescent-shaped moon.) # In[14]: train_X, train_Y = load_dataset() # We have already implemented a 3-layer neural network. You will train it with: # - Mini-batch **Gradient Descent**: it will call your function: # - `update_parameters_with_gd()` # - Mini-batch **Momentum**: it will call your functions: # - `initialize_velocity()` and `update_parameters_with_momentum()` # - Mini-batch **Adam**: it will call your functions: # - `initialize_adam()` and `update_parameters_with_adam()` # In[15]: def model(X, Y, layers_dims, optimizer, learning_rate = 0.0007, mini_batch_size = 64, beta = 0.9, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, num_epochs = 10000, print_cost = True): """
# print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) # print('v["dW1"] = ' + str(v["dW1"])) # print('v["db1"] = ' + str(v["db1"])) # print('v["dW2"] = ' + str(v["dW2"])) # print('v["db2"] = ' + str(v["db2"])) # print('s["dW1"] = ' + str(s["dW1"])) # print('s["db1"] = ' + str(s["db1"])) # print('s["dW2"] = ' + str(s["dW2"])) # print('s["db2"] = ' + str(s["db2"])) ##加载数据集 train_X,train_Y = opt_utils.load_dataset(is_plot = False) ##定义模型 def model(X,Y,layers_dims,optimizer,learning_rate = 0.0007, mini_batch_size = 64,beta = 0.9,beta1 = 0.9,beta2 = 0.999, epsilon = 1e-8,num_epochs = 10000,print_cost = True,is_plot = True ): ''' 可以运行在不同优化器模式下的三层神经网络模型. :param X: 输入数据,维度为(2,输入数据集里面样本数量)) :param Y: 与X对应的标签 :param layers_dims: 包含层数和节点数量的列表 :param optimizer: 字符出类型的参数,用于选择优化类型,['gd'|'momentum'|'adam'] :param learning_rate: 学习率 :param mini_batch_size: 每个小批量数据集的大小 :param beta: 用于动量优化的一个超参数 :param beta1:用于计算梯度后的指数衰减的估计的超参数
# print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) # print('v["dW1"] = ' + str(v["dW1"])) # print('v["db1"] = ' + str(v["db1"])) # print('v["dW2"] = ' + str(v["dW2"])) # print('v["db2"] = ' + str(v["db2"])) # print('s["dW1"] = ' + str(s["dW1"])) # print('s["db1"] = ' + str(s["db1"])) # print('s["dW2"] = ' + str(s["dW2"])) # print('s["db2"] = ' + str(s["db2"])) ##加载数据集 train_X, train_Y = opt_utils.load_dataset(is_plot=False) ##定义模型 def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True,
print("v[dW1] = " + str(v["dW1"])) print("v[db1] = " + str(v["db1"])) print("v[dW2] = " + str(v["dW2"])) print("v[db2] = " + str(v["db2"])) print("s[dW1] = " + str(s["dW1"])) print("s[db1] = " + str(s["db1"])) print("s[dW2] = " + str(s["dW2"])) print("s[db2] = " + str(s["db2"])) # ### 4.Test # #### 4.1 Load Dataset # In[18]: train_X, train_Y = opt_utils.load_dataset(is_plot=True) # #### 4.2 Define the model # In[36]: def model(X, Y, layer_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999,
print("b2 = " + str(parameters["b2"])) print("v[\"dW1\"] = " + str(v["dW1"])) print("v[\"db1\"] = " + str(v["db1"])) print("v[\"dW2\"] = " + str(v["dW2"])) print("v[\"db2\"] = " + str(v["db2"])) print("s[\"dW1\"] = " + str(s["dW1"])) print("s[\"db1\"] = " + str(s["db1"])) print("s[\"dW2\"] = " + str(s["dW2"])) print("s[\"db2\"] = " + str(s["db2"])) #--------------------------------------------------- # 5. Model with different optimization algotithms #--------------------------------------------------- train_x, train_y = load_dataset() plt.show() def model(X, Y, layer_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epoch=10000, print_cost=True): ''' 3-layer neural network model which can be run in different optimizer modes. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples) layers_dims -- python list, containing the size of each layer learning_rate -- the learning rate, scalar. mini_batch_size -- the size of a mini batch beta -- Momentum hyperparameter beta1 -- Exponential decay hyperparameter for the past gradients estimates
# parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t=2) # # print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) # print("v[\"dW1\"] = " + str(v["dW1"])) # print("v[\"db1\"] = " + str(v["db1"])) # print("v[\"dW2\"] = " + str(v["dW2"])) # print("v[\"db2\"] = " + str(v["db2"])) # print("s[\"dW1\"] = " + str(s["dW1"])) # print("s[\"db1\"] = " + str(s["db1"])) # print("s[\"dW2\"] = " + str(s["dW2"])) # print("s[\"db2\"] = " + str(s["db2"])) train_X, train_Y = load_dataset(is_plot=False) # train 3-layer model layers_dims = [train_X.shape[0], 5, 2, 1] # parameters = model(train_X, train_Y, layers_dims, optimizer="gd") # parameters = model(train_X, train_Y, layers_dims, beta=0.9, optimizer="momentum") parameters = model(train_X, train_Y, layers_dims, optimizer="adam") # Predict predictions = predict(train_X, train_Y, parameters) # Plot decision boundary plt.title("Model with Gradient Descent optimization") axes = plt.gca() axes.set_xlim([-1.5, 2.5]) axes.set_ylim([-1, 1.5])