def model(X, Y, layers, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): L = len(layers) costs = [] t = 0 seed = 10 parameters = initialize_parameters(layers) if optimizer == 'gd': pass elif optimizer == 'momentum': v = init_velocity(parameters) elif optimizer == 'adam': v, s = init_adam(parameters) for i in range(num_epochs): seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch # print(minibatch_Y.shape) a3, caches = forward_propagation(minibatch_X, parameters) cost = compute_cost(a3, minibatch_Y) grads = backward_propagation(minibatch_X, minibatch_Y, caches) if optimizer == 'gd': parameters = update_parameters_with_grad( parameters, grads, learning_rate) elif optimizer == 'momentum': parameters = update_parameters_with_momentum( parameters, grads, v, beta, learning_rate) elif optimizer == 'adam': t = t + 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) if print_cost and i % 1000 == 0: print('cost after epoch %i:%f' % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): """ 3-layer neural network model which can be run in different optimizer modes. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples) layers_dims -- python list, containing the size of each layer learning_rate -- the learning rate, scalar. mini_batch_size -- the size of a mini batch beta -- Momentum hyperparameter beta1 -- Exponential decay hyperparameter for the past gradients estimates beta2 -- Exponential decay hyperparameter for the past squared gradients estimates epsilon -- hyperparameter preventing division by zero in Adam updates num_epochs -- number of epochs print_cost -- True to print the cost every 1000 epochs Returns: parameters -- python dictionary containing your updated parameters """ L = len(layers_dims) costs = [] t = 0 seed = 10 parameters = initialize_parameters(layers_dims) if optimizer == 'gd': pass elif optimizer == 'momentum': v = initialize_velocity(parameters) elif optimizer == 'adam': v, s = initialize_adam(parameters) for i in range(num_epochs): seed = seed + 1 mini_batches = random_mini_batches(X, Y, mini_batch_size, seed) for mini_batch in mini_batches: (mini_batch_X, mini_batch_Y) = mini_batch a3, caches = forward_propagation(mini_batch_X, parameters) cost = compute_cost(a3, mini_batch_Y) grads = backward_propagation(mini_batch_X, mini_batch_Y, caches) #update parameters if optimizer == 'gd': parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == 'momentum': parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate) elif optimizer == 'adam': t = t + 1 parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) if print_cost and i % 1000 == 0: print('Cost after epoch %i: %f'%(i, cost)) if print_cost and i % 100 == 0: costs.append(cost) plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title('learning rate= ' + str(learning_rate)) plt.show() return parameters
def model_opt(X, Y, layer_dims, optimizer:str, alpha=0.08, mini_batch_size=64, beta1=0.9, beta2=0.999, epochs=10000): L = len(layer_dims) costs = [] t = 0 seed = 1 # initial weights params = opt_utils.initialize_parameters(layer_dims) # initial optimizer if optimizer == 'gd': pass elif optimizer == 'momentum': V = init_momentum(params) elif optimizer == 'adam': V, S = init_adam(params) else: print('Unexcepted optimizer!') # train for i in range(epochs): ## shuffle and get new mini_batches seed += 1 mini_batches = random_mini_batches(X, Y,mini_batch_size, seed) for batch in mini_batches: ### get X,Y in batch mini_X, mini_Y = batch ### forward propagate A3, cache = opt_utils.forward_propagation(mini_X, params) ### compute loss cost = opt_utils.compute_cost(A3, mini_Y) ### backward propagate grads = opt_utils.backward_propagation(mini_X, mini_Y, cache) ### update params if optimizer == 'gd': params = update_params_with_gd(params, grads, alpha) elif optimizer == 'momentum': params, V = update_params_with_momentum(params, grads, V, beta1, alpha) elif optimizer == 'adam': t += 1 params, V, S = update_params_with_adam(params, grads, V, S, t, alpha, beta1, beta2) else: print('Unexcepted optimizer!') if (i+1) % 100 == 0: costs.append(cost) print(f'No.{i+1} iteration\'s loss: {cost}') plt.plot(costs) plt.xlabel('# iterations per 100') plt.ylabel('loss') plt.title(f'{optimizer} loss circle') plt.show() return params
def model(X,Y,layers_dims,optimizer,learning_rate = 0.0007,mini_batch_size = 64, beta = 0.9, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, num_epochs = 10000,print_cost = True): L = len(layers_dims) costs = [] t = 0 seed = 10 parameters = initialize_parameters(layers_dims) if optimizer == "gd": pass elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v,s = initialize_adam(parameters) for i in range(num_epochs):#{ seed = seed + 1 minibatches = random_mini_batches(X,Y,mini_batch_size,seed) for minibatch in minibatches:#{ (minibatch_X,minibatch_Y) = minibatch a3,caches = forward_propagation(minibatch_X,parameters) cost = compute_cost(a3,minibatch_Y) grads = backward_propagation(minibatch_X,minibatch_Y,caches) if optimizer == "gd": parameters = update_parameters_with_gd(parameters,grads,learning_rate) elif optimizer == "momentum": parameters,v == update_parameters_with_momentum(parameters,grads,v,beta,learning_rate) elif optimizer == "adam": t = t + 1 #时间量,用于Adam更新参数前,偏差修正 parameters,v,s == update_parameters_with_adam(parameters,grads,v,s,t,learning_rate,beta1,beta2,epsilon) #} if print_cost and i%1000 == 0: print("Cost after epoch %i:%f"%(i,cost)) if print_cost and i%100 == 0 : costs.append(cost) #} # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): L = len(layers_dims) # number of layers in the neural networks costs = [] # to keep track of the cost t = 0 # initializing the counter required for Adam update seed = 10 # For grading purposes, so that your "random" minibatches are the same as ours # Initialize parameters parameters = initialize_parameters(layers_dims) # Initialize the optimizer if optimizer == "gd": pass # no initialization required for gradient descent elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) # Optimization loop for i in range(num_epochs): # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # Forward propagation a3, caches = forward_propagation(minibatch_X, parameters) # Compute cost cost = compute_cost(a3, minibatch_Y) # Backward propagation grads = backward_propagation(minibatch_X, minibatch_Y, caches) # Update parameters if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentum( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 # Adam counter parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # Print the cost every 1000 epoch if print_cost and i % 1000 == 0: print("Cost after epoch %i: %f" % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): """ 可以运行在不同优化器模式下的3层神经网络模型。 :param X:输入数据,维度为(2,输入的数据集里面样本数量) :param Y:与X对应的标签 :param layers_dims:包含层数和节点数量的列表 :param optimizer:字符串类型的参数,用于选择优化类型,【 "gd" | "momentum" | "adam" 】 :param learning_rate:学习率 mini_batch_size:每个小批量数据集的大小 :param beta:用于动量优化的一个超参数 :param beta1:用于计算平方梯度后的指数衰减的估计的超参数 :param beta2:用于计算梯度后的指数衰减的估计的超参数 :param epsilon:用于在Adam中避免除零操作的超参数,一般不更改 :param num_epochs:整个训练集的遍历次数,(视频2.9学习率衰减,1分55秒处,视频中称作“代”),相当于之前的num_iteration :param print_cost:是否打印误差值,每遍历1000次数据集打印一次,但是每100次记录一个误差值,又称每1000代打印一次 :param is_plot:是否绘制出曲线图 :return: parameters:包含了学习后的参数 """ L = len(layers_dims) costs = [] t = 0 # 每学习完一个minibatche就增加1 seed = 10 # 随机种子 parameters = initialize_parameters(layers_dims) # 初始化参数 # 选择优化器 if optimizer == "gd": pass # 不使用任何优化器,直接使用梯度下降算法 elif optimizer == "momentum": v = initialize_velocity(parameters) # 使用动量优化 elif optimizer == "adam": v, s = initialize_adam(parameters) # 使用Adam优化 else: print("optimizer参数错误") exit(1) # 开始学习 for i in range(num_epochs): # 定义随机minibatches,我们每次遍历数据集之后增加种子以重新排列数据集,是每次数据的顺序不同 seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # 选择一个minibatch (minibatch_X, minibatch_Y) = minibatch # 前向传播 A3, cache = forward_propagation(minibatch_X, parameters) # 计算误差 cost = compute_cost(A3, minibatch_Y) # 反向传播 grads = backward_propagation(minibatch_X, minibatch_Y, cache) # 更新参数 if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentum( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 # Adam counter parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # 记录误差值 if i % 100 == 0: costs.append(cost) # 是否打印误差值 if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,当前误差值:" + str(cost)) # 是否绘制曲线图 if is_plot: plt.plot(costs) plt.show() plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): ''' 可以运行在不同优化器模式下的三层神经网络模型. :param X: 输入数据,维度为(2,输入数据集里面样本数量)) :param Y: 与X对应的标签 :param layers_dims: 包含层数和节点数量的列表 :param optimizer: 字符出类型的参数,用于选择优化类型,['gd'|'momentum'|'adam'] :param learning_rate: 学习率 :param mini_batch_size: 每个小批量数据集的大小 :param beta: 用于动量优化的一个超参数 :param beta1:用于计算梯度后的指数衰减的估计的超参数 :param beta2: 用于计算平方梯度后的指数衰减的估计的超参数 :param epsilon: 用于在Adam中避免除零操作的超参数,一般不更改 :param num_epochs: 整个训练集的遍历次数 :param print_cost: 是否打印误差值,每遍历1000次数据打印一次,但是每100次记录一个误差值,又称每1000代打印一次 :param is_plot: 是否会制出曲线图 :return: parameters - 包含了学习后的参数 ''' L = len(layers_dims) costs = [] t = 0 #每学习完一个minibatch就增加1 seed = 10 #随机种子 #初始化参数 parameters = opt_utils.initialize_parameters(layers_dims) #选择优化器 if optimizer == 'gd': pass #不使用任何优化器,直接使用梯度下降算法 elif optimizer == 'momentum': v = initialize_velocity(parameters) #使用动量 elif optimizer == 'adam': v, s = initialize_adam(parameters) #使用adam优化 else: print('optimizer参数错误,程序退出') exit(1) #开始学习 for i in range(num_epochs): #定义随机minibatches,我们每次便利数据集之后增加种子以重新排列数据,使每次数据的顺序都不同 seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: #选择一个minibatch (minibatch_X, minibatch_Y) = minibatch #前向传播 A3, cache = opt_utils.forward_propagation(minibatch_X, parameters) #计算误差 cost = opt_utils.compute_cost(A3, minibatch_Y) #反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) #更新参数 if optimizer == 'gd': parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == 'momentum': parameters, v = update_parameters_with_momentun( parameters, grads, v, beta, learning_rate) elif optimizer == 'adam': t = t + 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) #记录误差值 if i % 100 == 0: costs.append(cost) #是否打印误差值 if print_cost and i % 1000 == 0: print('第' + str(i) + '次遍历整个数据集,当前误差值:' + str(cost)) #是否绘制曲线图 if is_plot: plt.plot(costs) plt.xlabel('epochs(per 100)') plt.ylabel('cost') plt.title('Learning rate = ' + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): L = len(layers_dims) costs = [] t = 0 seed = 10 parameters = opt_utils.initialize_parameters(layers_dims) if optimizer == 'gd': pass elif optimizer == 'momentum': v = initialize_velocity(parameters) elif optimizer == 'adam': v, s = initialize_adam(parameters) for i in range(num_epochs): seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: minibatch_X, minibatch_Y = minibatch A3, cache = opt_utils.forward_propagation(minibatch_X, parameters) # 计算误差 cost = opt_utils.compute_cost(A3, minibatch_Y) # 反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentum( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # 记录误差值 if i % 100 == 0: costs.append(cost) # 是否打印误差值 if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,当前误差值:" + str(cost)) # 是否绘制曲线图 if is_plot: plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
L = len(parameters) // 2 # number of layers in the neural networks # Update rule for each parameter for l in range(L): ### START CODE HERE ### (approx. 2 lines) parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * grads['dW' + str(l + 1)] parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * grads['db' + str(l + 1)] ### END CODE HERE ### return parameters # Batch Gradient Descent X = data_input Y = labels parameters = initialize_parameters(layers_dims) for i in range(0, num_iterations): # Forward propagation a, caches = forward_propagation(X, parameters) # Compute cost. cost = compute_cost(a, Y) # Backward propagation. grads = backward_propagation(a, caches, parameters) # Update parameters. parameters = update_parameters(parameters, grads) # Stochastic Gradient Descent X = data_input Y = labels parameters = initialize_parameters(layers_dims) for i in range(0, num_iterations):
def model(X, Y, learning_rate = 0.3, num_iterations = 30000, print_cost = True, lambd = 0, keep_prob = 1): """ Implements a three-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID. Arguments: X -- input data, of shape (input size, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (output size, number of examples) learning_rate -- learning rate of the optimization num_iterations -- number of iterations of the optimization loop print_cost -- If True, print the cost every 10000 iterations lambd -- regularization hyperparameter, scalar keep_prob - probability of keeping a neuron active during drop-out, scalar. Returns: parameters -- parameters learned by the model. They can then be used to predict. """ grads = {} costs = [] # to keep track of the cost m = X.shape[1] # number of examples layers_dims = [X.shape[0], 20, 3, 1] # Initialize parameters dictionary. parameters = initialize_parameters(layers_dims) # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID. if keep_prob == 1: a3, cache = forward_propagation(X, parameters) elif keep_prob < 1: a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob) # Cost function if lambd == 0: cost = compute_cost(a3, Y) else: cost = compute_cost_with_regularization(a3, Y, parameters, lambd) # Backward propagation. assert(lambd==0 or keep_prob==1) # it is possible to use both L2 regularization and dropout, # but this assignment will only explore one at a time if lambd == 0 and keep_prob == 1: grads = backward_propagation(X, Y, cache) elif lambd != 0: grads = backward_propagation_with_regularization(X, Y, cache, lambd) elif keep_prob < 1: grads = backward_propagation_with_dropout(X, Y, cache, keep_prob) # Update parameters. parameters = update_parameters(parameters, grads, learning_rate) # Print the loss every 10000 iterations if print_cost and i % 10000 == 0: print("Cost after iteration {}: {}".format(i, cost)) if print_cost and i % 1000 == 0: costs.append(cost) # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('iterations (x1,000)') plt.title("Learning rate =" + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): L = len(layers_dims) costs = [] t = 0 # 每学习完一个minibatch就增加1 seed = 10 # 随机种子 # 初始化参数 parameters = opt_utils.initialize_parameters(layers_dims) # 选择优化器 if optimizer == "gd": pass # 不使用任何优化器,直接使用梯度下降法 elif optimizer == "momentum": v = initialize_velocity(parameters) # 使用动量 elif optimizer == "adam": v, s = initialize_adam(parameters) # 使用Adam优化 else: print("optimizer参数错误,程序退出。") exit(1) # 开始学习 for i in range(num_epochs): # 定义随机 minibatches,我们在每次遍历数据集之后增加种子以重新排列数据集,使每次数据的顺序都不同 seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # 选择一个minibatch (minibatch_X, minibatch_Y) = minibatch # 前向传播 A3, cache = opt_utils.forward_propagation(minibatch_X, parameters) # 计算误差 cost = opt_utils.compute_cost(A3, minibatch_Y) # 反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) # 更新参数 if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentun(parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # 记录误差值 if i % 100 == 0: costs.append(cost) # 是否打印误差值 if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,当前误差值:" + str(cost)) # 是否绘制曲线图 if is_plot: plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
return parameters parameters, grads, learning_rate = update_parameters_with_gd_test_case() parameters = update_parameters_with_gd(parameters, grads, learning_rate) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) # Batch vs Stochastic X = data_input Y = labels parameters = initialize_parameters(layers_dims) # -- (Batch) for i in range(0, num_iterations): # Forward propagation a, caches = forward_propagation(X, parameters) # Compute cost. cost = compute_cost(a, Y) # Backward propagation. grads = backward_propagation(a, caches, parameters) # Update parameters. parameters = update_parameters(parameters, grads) # -- (Stochastic) for i in range(0, num_iterations): for j in range(0, m): # Forward propagation a, caches = forward_propagation(X[:,j], parameters)
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=20000, print_cost=True, is_plot=True): # 那就来看看哪个梯度下降算法号 L = len(layers_dims) costs = [] t = 0 # 每学完一个就增加1 seed = 10 # 初始化参数 parameters = opt_utils.initialize_parameters(layers_dims) # print(str(type(parameters)), "1") # 选择优化器 if optimizer == "gd": pass # 不用任何优化器,批量梯度下降算法 elif optimizer == "momentum": v = initialize_velocity(parameters) # 使用动量优化算法 elif optimizer == "adam": v, s = initialize_adam(parameters) # 使用adam优化算法 else: print("optimizer参数错误,程序退出") exit(1) # 开始学习 for i in range(num_epochs): seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # print(len(minibatches)) # 选择一个minibatch # (minibatch_X, minibatch_Y) = minibatch (minibatch_X, minibatch_Y) = minibatch # 前向传播 A3, cache = opt_utils.forward_propagation(minibatch_X, parameters) # 计算误差 cost = opt_utils.compute_cost(A3, minibatch_Y) # 反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) # 更新参数 if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": # 这里少了一个数,导致parameters格式输出错误。。,变成元组了 parameters, v = update_parameters_with_momentun( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) if i % 100 == 0: costs.append(cost) if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,误差值为:" + str(cost)) if is_plot: plt.plot(costs) plt.ylabel('cost') plt.xlabel('dai_shu(100)') plt.title("learning_rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): """ 可以运行在不同优化器模式下的3层神经网络模型。 参数: X - 输入数据,维度为(2,输入的数据集里面样本数量) Y - 与X对应的标签 layers_dims - 包含层数和节点数量的列表 optimizer - 字符串类型的参数,用于选择优化类型,【 "gd" | "momentum" | "adam" 】 learning_rate - 学习率 mini_batch_size - 每个小批量数据集的大小 beta - 用于动量优化的一个超参数 beta1 - 用于计算梯度后的指数衰减的估计的超参数 beta1 - 用于计算平方梯度后的指数衰减的估计的超参数 epsilon - 用于在Adam中避免除零操作的超参数,一般不更改 num_epochs - 整个训练集的遍历次数,(视频2.9学习率衰减,1分55秒处,视频中称作“代”),相当于之前的num_iteration print_cost - 是否打印误差值,每遍历1000次数据集打印一次,但是每100次记录一个误差值,又称每1000代打印一次 is_plot - 是否绘制出曲线图 返回: parameters - 包含了学习后的参数 """ L = len(layers_dims) costs = [] t = 0 # 每学习完一个minibatch就增加1 seed = 10 # 初始化参数 parameters = opt_utils.initialize_parameters(layers_dims) if optimizer == "gd": pass elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) else: print("optimizer 参数错误, 程序退出。") exit(1) # 开始学习 for i in range(num_epochs): #定义随机 minibatches,我们在每次遍历数据集之后增加种子以重新排列数据集,使每次数据的顺序都不同 seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # 选择一个minibatch (minibatch_X, minibatch_Y) = minibatch # 前向传播 A3, cache = opt_utils.forward_propagation(minibatch_X, parameters) # 计算误差 cost = opt_utils.compute_cost(A3, minibatch_Y) # 反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) # 更新参数 if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentun( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) if i % 100 == 0: costs.append(cost) if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,当前误差值: " + str(cost)) if is_plot: plt.plot(costs) plt.ylabel("cost") plt.xlabel("epochs (per 100)") plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.009, num_epochs = 100, minibatch_size = 64, print_cost = True): """ Implements a three-layer ConvNet in Tensorflow: CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED Arguments: X_train -- training set, of shape (None, 64, 64, 3) Y_train -- test set, of shape (None, n_y = 6) X_test -- training set, of shape (None, 64, 64, 3) Y_test -- test set, of shape (None, n_y = 6) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: train_accuracy -- real number, accuracy on the train set (X_train) test_accuracy -- real number, testing accuracy on the test set (X_test) parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep results consistent (tensorflow seed) seed = 3 # to keep results consistent (numpy seed) (m, n_H0, n_W0, n_C0) = X_train.shape n_y = Y_train.shape[1] costs = [] # To keep track of the cost # Create Placeholders of the correct shape ### START CODE HERE ### (1 line) X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y) ### END CODE HERE ### # Initialize parameters ### START CODE HERE ### (1 line) parameters = initialize_parameters() ### END CODE HERE ### # Forward propagation: Build the forward propagation in the tensorflow graph ### START CODE HERE ### (1 line) Z3 = forward_propagation(X, parameters) ### END CODE HERE ### # Cost function: Add cost function to tensorflow graph ### START CODE HERE ### (1 line) cost = compute_cost(Z3, Y) ### END CODE HERE ### # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost. ### START CODE HERE ### (1 line) optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) ### END CODE HERE ### # Initialize all the variables globally init = tf.global_variables_initializer() # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # IMPORTANT: The line that runs the graph on a minibatch. # Run the session to execute the optimizer and the cost, the feedict should contain a minibatch for (X,Y). ### START CODE HERE ### (1 line) _ , temp_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) ### END CODE HERE ### minibatch_cost += temp_cost / num_minibatches # Print the cost every epoch if print_cost == True and epoch % 5 == 0: print ("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # Calculate the correct predictions predict_op = tf.argmax(Z3, 1) correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1)) # Calculate accuracy on the test set accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy) train_accuracy = accuracy.eval({X: X_train, Y: Y_train}) test_accuracy = accuracy.eval({X: X_test, Y: Y_test}) print("Train Accuracy:", train_accuracy) print("Test Accuracy:", test_accuracy) return train_accuracy, test_accuracy, parameters
def model(X,Y,layers_dims,optimizer,learning_rate = 0.0007, mini_batch_size = 64,beta = 0.9,beta1 = 0.9,beta2 = 0.999, epsilon = 1e-8,num_epochs = 10000,print_cost = True,is_plot = True ): ''' 可以运行在不同优化器模式下的三层神经网络模型. :param X: 输入数据,维度为(2,输入数据集里面样本数量)) :param Y: 与X对应的标签 :param layers_dims: 包含层数和节点数量的列表 :param optimizer: 字符出类型的参数,用于选择优化类型,['gd'|'momentum'|'adam'] :param learning_rate: 学习率 :param mini_batch_size: 每个小批量数据集的大小 :param beta: 用于动量优化的一个超参数 :param beta1:用于计算梯度后的指数衰减的估计的超参数 :param beta2: 用于计算平方梯度后的指数衰减的估计的超参数 :param epsilon: 用于在Adam中避免除零操作的超参数,一般不更改 :param num_epochs: 整个训练集的遍历次数 :param print_cost: 是否打印误差值,每遍历1000次数据打印一次,但是每100次记录一个误差值,又称每1000代打印一次 :param is_plot: 是否会制出曲线图 :return: parameters - 包含了学习后的参数 ''' L = len(layers_dims) costs = [] t = 0#每学习完一个minibatch就增加1 seed = 10#随机种子 #初始化参数 parameters = opt_utils.initialize_parameters(layers_dims) #选择优化器 if optimizer == 'gd': pass #不使用任何优化器,直接使用梯度下降算法 elif optimizer == 'momentum': v = initialize_velocity(parameters)#使用动量 elif optimizer == 'adam': v,s = initialize_adam(parameters)#使用adam优化 else: print('optimizer参数错误,程序退出') exit(1) #开始学习 for i in range(num_epochs): #定义随机minibatches,我们每次便利数据集之后增加种子以重新排列数据,使每次数据的顺序都不同 seed = seed+1 minibatches = random_mini_batches(X,Y,mini_batch_size,seed) for minibatch in minibatches: #选择一个minibatch (minibatch_X,minibatch_Y) = minibatch #前向传播 A3,cache = opt_utils.forward_propagation(minibatch_X,parameters) #计算误差 cost = opt_utils.compute_cost(A3,minibatch_Y) #反向传播 grads = opt_utils.backward_propagation(minibatch_X,minibatch_Y,cache) #更新参数 if optimizer == 'gd': parameters = update_parameters_with_gd(parameters,grads,learning_rate) elif optimizer=='momentum': parameters,v = update_parameters_with_momentun(parameters,grads,v,beta,learning_rate) elif optimizer == 'adam': t = t+1 parameters,v,s = update_parameters_with_adam(parameters,grads,v,s,t,learning_rate,beta1,beta2,epsilon) #记录误差值 if i%100 == 0: costs.append(cost) #是否打印误差值 if print_cost and i%1000 == 0: print('第'+str(i)+'次遍历整个数据集,当前误差值:'+str(cost)) #是否绘制曲线图 if is_plot: plt.plot(costs) plt.xlabel('epochs(per 100)') plt.ylabel('cost') plt.title('Learning rate = '+str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate = 0.0007, mini_batch_size = 64, beta = 0.9, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, num_epochs = 10000, print_cost = True): """ 3-layer neural network model which can be run in different optimizer modes. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples) layers_dims -- python list, containing the size of each layer learning_rate -- the learning rate, scalar. mini_batch_size -- the size of a mini batch beta -- Momentum hyperparameter beta1 -- Exponential decay hyperparameter for the past gradients estimates beta2 -- Exponential decay hyperparameter for the past squared gradients estimates epsilon -- hyperparameter preventing division by zero in Adam updates num_epochs -- number of epochs print_cost -- True to print the cost every 1000 epochs Returns: parameters -- python dictionary containing your updated parameters """ L = len(layers_dims) # number of layers in the neural networks costs = [] # to keep track of the cost t = 0 # initializing the counter required for Adam update seed = 10 # For grading purposes, so that your "random" minibatches are the same as ours # Initialize parameters parameters = initialize_parameters(layers_dims) # Initialize the optimizer if optimizer == "gd": pass # no initialization required for gradient descent elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) # Optimization loop for i in range(num_epochs): # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # Forward propagation a3, caches = forward_propagation(minibatch_X, parameters) # Compute cost cost = compute_cost(a3, minibatch_Y) # Backward propagation grads = backward_propagation(minibatch_X, minibatch_Y, caches) # Update parameters if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 # Adam counter parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # Print the cost every 1000 epoch if print_cost and i % 1000 == 0: print ("Cost after epoch %i: %f" %(i, cost)) if print_cost and i % 100 == 0: costs.append(cost) # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layer_dims, optimizer='adam', learning_rate=0.0007, beta1=0.9, beta2=0.999, epsilon=1e-8, minibatch_size=64, epochs=10000, print_cost=True, is_plot=True): ''' :param X:输入图像 :param Y:输入标签 :param layer_dims:输入网络每层神经元的个数 :param optimizer:选择优化器 包括'adam','gd','momentum' :param learning_rate:学习率 :param beta1:momentum超参数动量 :param beta2:RMSProp超参数动量 :param epsilon:平滑 :param minibatch_size:minibatch的大小 :param epochs:循环多少次 :param print_cost:是否输出Cost :param is_plot:是否显示可视化结果 :return:参数 ''' costs = [] t = 0 seed = 10 #初始化网络权重 pass parameters = opt_utils.initialize_parameters(layer_dims) if optimizer == 'adam': v, s = initialize_adam(parameters) elif optimizer == 'momentum': v = initialize_velocity(parameters) elif optimizer == 'gd': pass else: print('输入参数出错,程序退出。') exit(1) for i in range(epochs): seed += 1 mini_batches = random_mini_batches(X, Y, minibatch_size, seed) for j in mini_batches: (minibatch_X, minibatch_Y) = j #前向传播 a3, cache = opt_utils.forward_propagation(minibatch_X, parameters) #计算Cost Loss = opt_utils.compute_cost(a3, minibatch_Y) #反向传播 grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache) #更新参数 if optimizer == 'adam': t += 1 parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) elif optimizer == 'momentum': parameters, v = update_parameters_with_momentum( parameters, grads, v, beta1, learning_rate) else: parameters = update_parameters_with_gd(parameters, grads, learning_rate) if i % 100 == 0: costs.append(Loss) if print_cost and i % 1000 == 0: print("第" + str(i) + "次遍历整个数据集,当前误差值:" + str(Loss)) if is_plot: plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): """ 3-layer neural network model which can be run in different optimizer modes. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples) layers_dims -- python list, containing the size of each layer learning_rate -- the learning rate, scalar. mini_batch_size -- the size of a mini batch beta -- Momentum hyperparameter beta1 -- Exponential decay hyperparameter for the past gradients estimates beta2 -- Exponential decay hyperparameter for the past squared gradients estimates epsilon -- hyperparameter preventing division by zero in Adam updates num_epochs -- number of epochs print_cost -- True to print the cost every 1000 epochs Returns: parameters -- python dictionary containing your updated parameters """ L = len(layers_dims) # number of layers in the neural networks costs = [] # to keep track of the cost t = 0 # initializing the counter required for Adam update seed = 10 # For grading purposes, so that your "random" minibatches are the same as ours # Initialize parameters parameters = initialize_parameters(layers_dims) # Initialize the optimizer if optimizer == "gd": pass # no initialization required for gradient descent elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) # Optimization loop for i in range(num_epochs): # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch seed = seed + 1 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # Forward propagation a3, caches = forward_propagation(minibatch_X, parameters) # Compute cost cost = compute_cost(a3, minibatch_Y) # Backward propagation grads = backward_propagation(minibatch_X, minibatch_Y, caches) # Update parameters if optimizer == "gd": parameters = update_parameters_with_gd(parameters, grads, learning_rate) elif optimizer == "momentum": parameters, v = update_parameters_with_momentum( parameters, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 # Adam counter parameters, v, s = update_parameters_with_adam( parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) # Print the cost every 1000 epoch if print_cost and i % 1000 == 0: print("Cost after epoch %i: %f" % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters
def model(X, Y, layer_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True): L = len(layer_dims) costs = [] t = 0 seed = 10 params = opt_utils.initialize_parameters(layer_dims) # choose optimization method if optimizer == "gd": pass elif optimizer == "momentum": v = init_velocity(params) elif optimizer == "adam": v, s = init_adam(params) else: print("Wrong optimizer parameter") exit(1) # Learning for i in range(num_epochs): seed = seed + 1 mini_batches = random_mini_batches(X, Y, mini_batch_size, seed) for mini_batch in mini_batches: (mini_batch_X, mini_batch_Y) = mini_batch # fp A3, cache = opt_utils.forward_propagation(mini_batch_X, params) # compute cost cost = opt_utils.compute_cost(A3, mini_batch_Y) # bp grads = opt_utils.backward_propagation(mini_batch_X, mini_batch_Y, cache) # upgrade params if optimizer == "gd": params = update_params_with_gd(params, grads, learning_rate) elif optimizer == "momentum": params, v = update_params_with_momentum( params, grads, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 params, v, s = update_params_with_adam(params, grads, v, s, t, learning_rate, beta1, beta2, epsilon) if i % 100 == 0: costs.append(cost) if print_cost and i % 1000 == 0: print("Epoch " + str(i) + " Cost:" + str(cost)) if is_plot: plt.plot(costs) plt.ylabel("cost") plt.xlabel("#epochs") plt.title("Learning rate = " + str(learning_rate)) plt.show() return params