def solve(self, costs_matrix, init_state=None): state = greedy_solution(costs_matrix) if not init_state else init_state current_energy = compute_cost(state, costs_matrix) best_state = state best_state_energy = current_energy T = self.init_T size = len(state) for _ in range(1, self.n_iter): move = get_random_move(size) candidat_state = self.state_gen(state, *move) candidant_energy = compute_cost(candidat_state, costs_matrix) if (candidant_energy < current_energy): current_energy = candidant_energy state = candidat_state if (current_energy < best_state_energy): best_state = state best_state_energy = current_energy else: p = transition_probability(candidant_energy - current_energy, T) if make_transition(p): current_energy = candidant_energy state = candidat_state T = T * self.cooling_factor if T <= self.end_T: break self.final_path = best_state self.final_cost = best_state_energy
def solve(self, matrix): if self.init_state is None: self.init_state = greedy_solution(matrix) current_state = self.init_state current_cost = compute_cost(current_state, matrix) best_move = None self.final_path = current_state self.final_cost = current_cost tabus = [] for _ in range(self.n_iter): neighbours = self.get_neighours(current_state) neighbours_costs = map( lambda state: compute_cost(state.path, matrix), neighbours) neighbours_with_cost = zip(neighbours_costs, neighbours) for cost, neighbour in sorted(neighbours_with_cost): if cost < current_cost: if neighbour.move not in tabus or self.aspiration_criteria( cost): current_cost = cost current_state = neighbour.path best_move = neighbour.move if current_cost < self.final_cost: self.final_cost = current_cost self.final_path = current_state tabus.append(best_move) if len(tabus) > self.tabu_size: tabus.pop(0)
def L_layer_model(self, X, Y, learning_rate=0.0075, num_iterations=3000, print_cost=False): np.random.seed(1) # keep track of cost costs = [] steps = [] # Parameters initialization. parameters = self.initialize_parameters() # Loop (gradient descent) grads_old = None for i in range(0, num_iterations): # Forward propagation:[LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID. AL, forward_cache = self.L_model_forward(X, parameters) # Compute cost. cost = compute_cost(AL, Y) # Backward propagation. grads = self.L_model_backward(AL, Y, parameters, forward_cache) # Update parameters. parameters = self.update_parameters(parameters, grads, learning_rate, grads_old) grads_old = grads # Print the cost every 100training example if print_cost and i % 4 == 0: print("Cost afteriteration %i: %f" % (i, cost)) steps.append(i) costs.append(cost) self.result.append((steps, costs)) return parameters
def train( X: np.ndarray, y: np.ndarray, epoch: int = 100, learning_rate: float = 0.01 ) -> dict: """ Train a logistic regression model Parameters ---------- X: [n,m] matrix of training examples Y: [1,m] matrix of output labels/values epoch: number of iterations to perform learning_rate: step-size for gradient descent update Returns --------- a dictionary of learnt parameters (weights & biases) """ params = init_parameters(X.shape[0]) print(f"X.shape = {X.shape}, Y.shape = {Y.shape}") print(f"initial params: {params}") for i in range(epoch): A = forward_prop(X=X, params=params) # forward prop to get prediction cost = compute_cost(Y=Y, Y_hat=A) # compute cost grads = compute_grads(X=X, Y=Y, A=A, params=params) # compute gradient params = update_parameters( params=params, grads=grads, learning_rate=learning_rate ) # update parameters using gradient descent if i % 100 == 0: print(f"epoch={i}\tcost={cost}") print(f"learnt params: {params}") return params
def solve(self, costs): size = costs.shape[0] for path in itertools.permutations(range(1, size)): cost = compute_cost(path, costs) if cost < self.final_cost: self.final_cost = cost self.final_path = path self.final_path = list(self.final_path)
def execute(rnn, x, y, sequence_length): # Execute the model (chx, mhx, rv) = (None, None, None) output, (chx, mhx, rv), v = rnn(x, (None, mhx, None), reset_experience=True, pass_through_memory=True) # Get only the final part of the sequence y_out = sigm(output[:, :-sequence_length, :-3]) y = y[:, :, :-3] return compute_cost(sigm(output[:, -sequence_length:, :-3]), y, batch_size=1).item()
def train( X: np.ndarray, y: np.ndarray, hidden_layer_dims: list = [3], epoch: int = 100, learning_rate: float = 0.01, ) -> dict: """ Train a neural network Parameters ---------- X: [n,m] matrix of training examples Y: [1,m] matrix of output labels/values hidden_layer_dims: a list of hidden layer dimensions where each item denotes number of neurons in that layer epoch: number of iterations to perform learning_rate: step-size for gradient descent update Returns --------- a dictionary of learnt parameters (weights & biases) """ n_x = X.shape[0] n_y = y.shape[0] layer_dims = [n_x] + hidden_layer_dims + [n_y] params = init_parameters(layer_dims=layer_dims) print(f"X.shape = {X.shape}, Y.shape = {Y.shape}") print("layer dims", layer_dims) print(f"initial params: {params}") for i in range(epoch): A, cache = forward_prop( X=X, params=params) # forward prop to get prediction cost = compute_cost(Y=Y, Y_hat=A) # compute cost grads = compute_grads(X=X, Y=Y, cache=cache, params=params) # compute gradient params = update_parameters( params=params, grads=grads, learning_rate=learning_rate ) # update parameters using gradient descent if i % 100 == 0: print(f"epoch={i}\tcost={cost}") print(f"learnt params: {params}") return params
def compute_cost_with_regularization(A3, Y, parameters, lambd): """ 损失函数中增加L2正则化 """ m = Y.shape[1] W1 = parameters["W1"] W2 = parameters["W2"] W3 = parameters["W3"] # 计算交叉熵损失 cross_entropy_cost = compute_cost(A3, Y) # 开始 L2_regularization_cost = (1. / m) * (lambd / 2) * \ (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) cost = cross_entropy_cost + L2_regularization_cost # 结束 return cost
plot_data(X, Y, xlabel, ylabel, legend) # ============= Map data to Polynomial features ============== # # this maps the features to a polynomial of degree 6 X = map_features(X[:, 0], X[:, 1]) # ============= Run logistic regression ================ # initial_theta = np.zeros(X.shape[1]) # Regularization Parameter: This dictates how much the cost function is penalized initial_lambda = 1 print('Computing cost with initial theta...') cost = compute_cost(initial_theta, X, Y, regularized=True, lambda_=initial_lambda) grad = compute_gradient(initial_theta, X, Y, regularized=True, lambda_=initial_lambda) print(f'Cost with initial theta: {cost}') print(f'First 5 gradients with initial theta\n{grad[:5].reshape(-1, 1)}') test_theta = np.ones(X.shape[1]) test_lambda = 10 print('Computing cost with test theta...') cost = compute_cost(test_theta, X, Y, regularized=True, lambda_=test_lambda) grad = compute_gradient(test_theta, X, Y, regularized=True, lambda_=test_lambda) print(f'Cost with test theta: {cost}') print(f'First 5 gradients with test theta\n{grad[:5].reshape(-1, 1)}')
def model(X, Y, learning_rate=0.3, num_iterations=30000, lambd=0, keep_prob=1): """ 使用三层网络,激活函数为:LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID. 第一个隐层:20个神经元 第二个隐层:3个神经元 输出层:1个神经元 """ grads = {} costs = [] m = X.shape[1] layers_dims = [X.shape[0], 20, 3, 1] # 初始化网络参数 parameters = initialize_parameters(layers_dims) # 梯度下降循环逻辑 for i in range(0, num_iterations): # 前向传播计算 # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID. # 如果keep_prob=1,进行正常前向传播 # 如果keep_prob<1,说明需要进行droupout计算 if keep_prob == 1: a3, cache = forward_propagation(X, parameters) elif keep_prob < 1: a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob) # 计算损失 # 如果传入lambd不为0,判断加入正则化 if lambd == 0: cost = compute_cost(a3, Y) else: cost = compute_cost_with_regularization(a3, Y, parameters, lambd) # 只允许选择一个,要么L2正则化,要么Droupout assert (lambd == 0 or keep_prob == 1) if lambd == 0 and keep_prob == 1: grads = backward_propagation(X, Y, cache) elif lambd != 0: grads = backward_propagation_with_regularization(X, Y, cache, lambd) elif keep_prob < 1: grads = backward_propagation_with_dropout(X, Y, cache, keep_prob) # 更新参数 parameters = update_parameters(parameters, grads, learning_rate) # 每10000词打印损失结果 if i % 10000 == 0: print("迭代次数为 {}: 损失结果大小:{}".format(i, cost)) costs.append(cost) # 画出损失变化结果图 plt.plot(costs) plt.ylabel('损失') plt.xlabel('迭代次数') plt.title("损失变化图,学习率为" + str(learning_rate)) plt.show() return parameters
print('Remember to close the plot. Otherwise, the process does not continue') xlabel = 'Score: First Exam' ylabel = 'Score: Second Exam' legend = ['Admitted', 'Not admitted'] plot_data(X, Y, xlabel, ylabel, legend) # ============= Part 2: Compute cost and gradient ============== # print('Calculating cost and gradient...') m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis=1) initial_theta = np.zeros((n + 1, 1)) cost = compute_cost(initial_theta, X, Y) grad = compute_gradient(initial_theta, X, Y) print(f'Cost with initial parameters (all zeros): {cost}') print(f'Gradients with initial parameters:\n{grad}') test_theta = np.array([[-24], [0.2], [0.2]]) cost = compute_cost(test_theta, X, Y) grad = compute_gradient(test_theta, X, Y) print(f'Cost with test parameters:\n{test_theta}\nCost:{cost}') print(f'Gradients with test parameters: \n{grad}') input('Press enter to continue...') # ================= Part 3: Optimizing ================== #
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, num_epochs=1500, minibatch_size=32, print_cost=True): """ Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX. Arguments: X_train -- training set, of shape (input size = 784, number of training examples = 27455) Y_train -- training set, of shape (output size = 24, number of training examples = 27455) X_test -- test set, of shape (input size = 784, number of training examples = 7172) Y_test -- test set, of shape (output size = 24, number of test examples = 7172) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph( ) # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep consistent results seed = 3 # to keep consistent results ( n_x, m ) = X_train.shape # (n_x: input size, m : number of examples in the train set) n_y = Y_train.shape[0] # n_y : output size costs = [] # To keep track of the cost # Create Placeholders of shape (n_x, n_y) X, Y = create_placeholders(n_x, n_y) # Initialize parameters parameters = initialize_parameters() # Forward propagation: Build the forward propagation in the tensorflow graph Z3 = forward_propagation(X, parameters) # Cost function: Add cost function to tensorflow graph cost = compute_cost(Z3, Y) # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer. optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) # Initialize all the variables init = tf.global_variables_initializer() # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(num_epochs): epoch_cost = 0. # Defines a cost related to an epoch num_minibatches = int( m / minibatch_size ) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # IMPORTANT: The line that runs the graph on a minibatch. # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y). _, minibatch_cost = sess.run([optimizer, cost], feed_dict={ X: minibatch_X, Y: minibatch_Y }) epoch_cost += minibatch_cost / minibatch_size # Print the cost every epoch if print_cost == True and epoch % 100 == 0: print("Cost after epoch %i: %f" % (epoch, epoch_cost)) if print_cost == True and epoch % 5 == 0: costs.append(epoch_cost) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per fives)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # lets save the parameters in a variable parameters = sess.run(parameters) print("Parameters have been trained!") # Calculate the correct predictions correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y)) # Calculate accuracy on the test set accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train})) print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test})) return parameters
def model(X_train, Y_train, X_test, Y_test, learning_rate, num_epochs, minibatch_size, print_cost = True): tf.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep consistent results seed = 3 # to keep consistent results (n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set) n_y = Y_train.shape[0] # n_y : output size costs = [] # To keep track of the cost X, Y = create_placeholders(n_x, n_y) parameters = initialize_parameters() Z3 = forward_propagation(X, parameters) cost = compute_cost(Z3, Y) print(cost) # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer. ### START CODE HERE ### (1 line) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) ### END CODE HERE ### # Initialize all the variables init = tf.global_variables_initializer() # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(num_epochs): epoch_cost = 0. # Defines a cost related to an epoch num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # IMPORTANT: The line that runs the graph on a minibatch. # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y). ### START CODE HERE ### (1 line) _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) ### END CODE HERE ### epoch_cost += minibatch_cost / num_minibatches # Print the cost every epoch if print_cost == True and epoch % 100 == 0: print ("Cost after epoch %i: %f" % (epoch, epoch_cost)) if print_cost == True and epoch % 5 == 0: costs.append(epoch_cost) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # check=sess.run(tf.test.compute_gradient_error(X_train, X_train.shape, Y_train, Y_train.shape)) # print(check) # lets save the parameters in a variable parameters = sess.run(parameters) print ("Parameters have been trained!") # Calculate the correct predictions correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y)) # Calculate accuracy on the test set accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train})) print("X_test -------", X_test.shape) print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test})) return parameters
def model(X, Y, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True): """ 模型逻辑 定义一个三层网络(不包括输入层) 第一个隐层:5个神经元 第二个隐层:2个神经元 输出层:1个神经元 """ # 计算网络的层数 layers_dims = [train_X.shape[0], 5, 2, 1] L = len(layers_dims) costs = [] t = 0 seed = 10 # 初始化网络结构 parameters = initialize_parameters(layers_dims) # 初始化优化器参数 if optimizer == "momentum": v = initialize_momentum(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) # 优化逻辑 for i in range(num_epochs): # 每次迭代所有样本顺序打乱不一样 seed = seed + 1 # 获取每批次数据 minibatches = random_mini_batches(X, Y, mini_batch_size, seed) # 开始 for minibatch in minibatches: # Mini-batch每批次的数据 (minibatch_X, minibatch_Y) = minibatch # 前向传播minibatch_X, parameters,返回a3, caches a3, caches = forward_propagation(minibatch_X, parameters) # 计算损失,a3, minibatch_Y,返回cost cost = compute_cost(a3, minibatch_Y) # 反向传播,返回梯度 gradients = backward_propagation(minibatch_X, minibatch_Y, caches) # 更新参数 if optimizer == "momentum": parameters, v = update_parameters_with_momentum(parameters, gradients, v, beta, learning_rate) elif optimizer == "adam": t = t + 1 parameters, v, s = update_parameters_with_adam(parameters, gradients, v, s, t, learning_rate, beta1, beta2, epsilon) # 结束 # 每个1000批次打印损失 if print_cost and i % 1000 == 0: print("第 %i 次迭代的损失值: %f" % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) # 画出损失的变化 plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("损失图") plt.show() return parameters
def model(self, path_train_dataset, path_test_dataset, X_train_column, Y_train_column, X_test_column, Y_test_column, classes_list, optimizer_algo='adam', print_cost=True): # load the datasets X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset( path_train_dataset, path_test_dataset, X_train_column, Y_train_column, X_test_column, Y_test_column, classes_list) # pre-processing X_train, Y_train, X_test, Y_test = flatten(X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes) # to be able to rerun the model without overwriting tf variables ops.reset_default_graph() # (n_x: input size, m : number of examples in the train set) (n_x, m) = X_train.shape n_y = Y_train.shape[0] # n_y : output size costs = [] # To keep track of the cost # Create Placeholders of shape (n_x, n_y) X, Y = create_placeholders(n_x, n_y) # Initialize parameters parameters = initialize_parameters(self.layers_list, seed=1) # Forward propagation: Build for-propagation in the tensorflow graph Z_final_layer = forward_propagation(X, parameters) # Cost function: Add cost function to tensorflow graph cost = compute_cost(Z_final_layer, Y) # Backpropagation: Define the tensorflow optimizer. Use AdamOptimizer if optimizer_algo == 'gradient_descent': optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.learning_rate).minimize(cost) elif optimizer_algo == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate).minimize(cost) elif optimizer_algo == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(cost) # Initialize all the variables init = tf.global_variables_initializer() # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(self.n_epochs): epoch_cost = 0. # Defines a cost related to an epoch # number of minibatches of size minibatch_size in the train set num_minibatches = int(m / minibatch_size) seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, self.minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch _, minibatch_cost = sess.run([optimizer, cost], feed_dict={ X: minibatch_X, Y: minibatch_Y }) epoch_cost += minibatch_cost / minibatch_size # Print the cost every epoch if print_cost == True and epoch % 100 == 0: print("Cost after epoch %i: %f" % (epoch, epoch_cost)) if print_cost == True and epoch % 5 == 0: costs.append(epoch_cost) # lets save the parameters in a variable parameters = sess.run(parameters) print("Parameters have been trained!") # stores quantities useful for later quantities = { "X": X, "Y": Y, "Z_final_layer": Z_final_layer, "X_train": X_train, "Y_train": Y_train, "X_test": X_test, "Y_test": Y_test } return quantities, costs, parameters
display_data(X[rand_indxs, :]) input('Press enter to continue...') # ========== Test Logistic Regression ============ # theta_t = np.array([-2, -1, 1, 2]) ones = np.ones((5, 1)) X_t = np.concatenate( [np.ones((5, 1)), np.arange(1, 16).reshape(5, 3, order='F') / 10], axis=1) y_t = np.array([1, 0, 1, 0, 1]) >= 0.5 lambda_t = 3 cost = compute_cost(theta_t, X_t, y_t, regularized=True, lambda_=lambda_t) grad = compute_gradient(theta_t, X_t, y_t, regularized=True, lambda_=lambda_t) print(f'cost with test parameters: {cost}') print(f'gradients with test parameters:\n{grad}') input('Press enter to continue...') # =============== Train One vs All =============== # lambda_ = 0.1 m = OneVsAll(X, Y, num_labels, lambda_) m.fit() # =============== Predict One vs All =============== #