def calculate_loss(self, gt_verbs, role_label_pred, gt_labels,args): batch_size = role_label_pred.size()[0] if args.train_all: loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 #verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) #frame_loss = criterion(role_label_pred[i], gt_labels[i,index]) for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,j] ,self.vocab_size) frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]]) #print('frame loss', frame_loss, 'verb loss', verb_loss) loss += frame_loss else: #verb from pre-trained loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 #verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) #frame_loss = criterion(role_label_pred[i], gt_labels[i,index]) for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,j] ,self.vocab_size) frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]]) #print('frame loss', frame_loss, 'verb loss', verb_loss) loss += frame_loss final_loss = loss/batch_size #print('loss :', final_loss) return final_loss
def calculate_eval_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels,args): batch_size = verb_pred.size()[0] sorted_idx = torch.sort(verb_pred, 1, True)[1] pred_verbs = sorted_idx[:,0] #print('eval pred verbs :', pred_verbs) if args.train_all: loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) gt_role_list = self.encoder.get_role_ids(gt_verbs[i]) pred_role_list = self.encoder.get_role_ids(pred_verbs[i]) #print ('role list diff :', gt_role_list, pred_role_list) for j in range(0, self.max_role_count): if pred_role_list[j] == len(self.encoder.role_list): continue if pred_role_list[j] in gt_role_list: #print('eval loss :', gt_role_list, pred_role_list[j]) g_idx = (gt_role_list == pred_role_list[j]).nonzero() #print('found idx' , g_idx) frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,g_idx] ,self.vocab_size) frame_loss = verb_loss + frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]]) #print('frame loss', frame_loss) loss += frame_loss else: loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) gt_role_list = self.encoder.get_role_ids(gt_verbs[i]) pred_role_list = self.encoder.get_role_ids(pred_verbs[i]) #print ('role list diff :', gt_role_list, pred_role_list) for j in range(0, self.max_role_count): if pred_role_list[j] == len(self.encoder.role_list): continue if pred_role_list[j] in gt_role_list: #print('eval loss :', gt_role_list, pred_role_list[j]) g_idx = (gt_role_list == pred_role_list[j]).nonzero() #print('found idx' , g_idx) frame_loss += utils.cross_entropy_loss(role_label_pred[i][j], gt_labels[i,index,g_idx] ,self.vocab_size) frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]]) #print('frame loss', frame_loss) loss += frame_loss final_loss = loss/batch_size #print('loss :', final_loss) return final_loss
def fit(self, X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer): loss_history = [] train_accuracy = [] test_accuracy = [] self.init() data_gen = utils.DataGenerator(X_train, y_train, batch_size) itr = 0 for epoch in range(num_epochs): epoch_iter = 0 epoch_accuracy = [] for X, Y in data_gen: optimizer.zeroGrad() probabilities = self.forward(X) loss = utils.cross_entropy_loss(probabilities, Y) self.backward(Y) loss_history += [loss] itr += 1 epoch_iter += 1 optimizer.step() epoch_acc = self.evaluate(X, Y) epoch_accuracy.append(epoch_acc) train_acc = np.array(epoch_accuracy).sum() / epoch_iter train_accuracy.append(train_acc) test_acc = self.evaluate(X_test, y_test) test_accuracy.append(test_acc) print("epoch = {}, train accuracy = {} test accuracy = {}".format( epoch, train_acc, test_acc)) return loss_history, train_accuracy, test_accuracy
def calculate_loss(self, rot_pred, gt_labels): batch_size = rot_pred.size()[0] loss = 0 for i in range(batch_size): verb_loss = utils.cross_entropy_loss(rot_pred[i], gt_labels[i]) loss += verb_loss final_loss = loss/batch_size return final_loss
def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels): batch_size = verb_pred.size()[0] loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss( role_label_pred[i][j], gt_labels[i, index, j], self.vocab_size) loss += (verb_loss + frame_loss / len(self.encoder.verb2_role_dict[ self.encoder.verb_list[gt_verbs[i]]])) final_loss = loss / batch_size print('loss :', final_loss) return final_loss
def calculate_loss(self, verb_pred, gt_verbs): batch_size = verb_pred.size()[0] loss = 0 #print('eval pred verbs :', pred_verbs) for i in range(batch_size): verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) loss += verb_loss final_loss = loss / batch_size return final_loss
def calculate_loss(self, agent_pred, gt_labels): batch_size = agent_pred.size()[0] loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): loss += utils.cross_entropy_loss(agent_pred[i], gt_labels[i, index]) final_loss = loss / batch_size #print('loss :', final_loss) return final_loss
def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels): batch_size = verb_pred.size()[0] verb_ref = verb_pred.size(1) loss = 0 for i in range(batch_size): for index in range(verb_ref): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i][index], gt_verbs[i]) for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss( role_label_pred[i][j], gt_labels[i, index, j], self.all_nouns_count) frame_loss = verb_loss + frame_loss / len( self.encoder.verb2_role_dict[self.encoder.verb_list[ gt_verbs[i]]]) loss += frame_loss final_loss = loss / batch_size return final_loss
def train(self): data = self.train_data labels = self.train_labels for epoch in range(self.epochs): utils.tic() total_loss = 0.0 # every epoch loss should start with zero good = 0.0 total_size = 0.0 # TODO: shuffle? data, labels = utils.shuffle(data, labels) for d, l in zip(data, labels): total_size += 1 pred, cache = self.fprop(d) # check the prediction y_hat = np.argmax(pred) if y_hat == l: good += 1 err_cost = float(pred[int(l)]) # loss = -1 * log(err_cost) cross_entropy = utils.cross_entropy_loss(err_cost) if self.L2: cross_entropy += utils.L2_cost(self.parameters["W"], self.L2) total_loss += cross_entropy grads = self.bprop(cache, d, l) self.weights_updates(grads) print('epoch {}:'.format(epoch + 1)) acc = good * 100 / total_size train_acc.append(acc) avg_loss = total_loss / total_size train_loss.append(avg_loss) print('train accuracy: {:2.2f}%'.format(acc)) print('train AVG loss: {:2.2f}'.format(avg_loss)) self.validation_acc() print('time:') utils.toc() # end of epoch # cache all about model trained_model = { "norm": self.norm, "parameters": self.parameters, "lr": self.lr } directory = str(len(self.hidden)) + 'Hidden/L2/' np.save(directory + 'model_' + self.model_name, trained_model) self.printGraph(directory)
def calculate_loss(self, verb_pred, gt_verbs, role_label_pred, gt_labels): batch_size = verb_pred.size()[0] criterion = nn.CrossEntropyLoss(ignore_index=self.vocab_size) loss = 0 for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) #frame_loss = criterion(role_label_pred[i], gt_labels[i,index]) for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss( role_label_pred[i][j], gt_labels[i, index, j], self.vocab_size) frame_loss = verb_loss + frame_loss / len( self.encoder.verb2_role_dict[self.encoder.verb_list[ gt_verbs[i]]]) #print('frame loss', frame_loss, 'verb loss', verb_loss) loss += frame_loss final_loss = loss / batch_size #print('loss :', final_loss) return final_loss
def calculate_loss(self, ans_predict, all_answers): batch_size = ans_predict.size()[0] loss = 0 for i in range(batch_size): frame_loss = 0 for index in range(all_answers.size()[1]): frame_loss += utils.cross_entropy_loss(ans_predict[i], all_answers[i][index]) #frame_loss = criterion(role_label_pred[i], gt_labels[i,index]) loss += frame_loss final_loss = loss / batch_size #print('loss :', final_loss) return final_loss
def calculate_loss_mul(self, verb_pred, gt_verbs): batch_size = verb_pred.size()[0] verb_ref = verb_pred.size(1) loss = 0 #print('eval pred verbs :', pred_verbs) for i in range(batch_size): verb_loss = 0 for r in range(verb_ref): verb_loss += utils.cross_entropy_loss(verb_pred[i][r], gt_verbs[i]) loss += verb_loss final_loss = loss / batch_size #print('loss :', final_loss) return final_loss
def calculate_role_loss(self, gt_verbs, role_pred, gt_role,args): batch_size = role_pred.size()[0] if args.train_all: loss = 0 for i in range(batch_size): frame_loss = 0 for j in range(0, self.max_role_count): frame_loss += utils.cross_entropy_loss(role_pred[i][j], gt_role[i,j] ,self.n_roles) frame_loss = frame_loss/len(self.encoder.verb2_role_dict[self.encoder.verb_list[gt_verbs[i]]]) loss += frame_loss final_loss = loss/batch_size #print('loss :', final_loss) return final_loss
def calculate_eval_loss(self, verb_pred, gt_verbs, gt_labels): batch_size = verb_pred.size()[0] loss = 0 #print('eval pred verbs :', pred_verbs) for i in range(batch_size): for index in range(gt_labels.size()[1]): frame_loss = 0 verb_loss = utils.cross_entropy_loss(verb_pred[i], gt_verbs[i]) #frame_loss += verb_loss #print('frame loss', frame_loss) loss += verb_loss final_loss = loss / batch_size #print('loss :', final_loss) return final_loss
def validation_acc(self): total = 0.0 good = 0.0 total_loss = 0.0 for d, l in zip(self.val_data, self.val_labels): total += 1 pred, cache = self.fprop(d) y_hat = np.argmax(pred) if y_hat == int(l): good += 1 err_cost = float(pred[int(l)]) cross_entropy = utils.cross_entropy_loss(err_cost) if self.L2: cross_entropy += utils.L2_cost(self.parameters["W"], self.L2) total_loss += cross_entropy acc = good * 100 / total val_acc.append(acc) avg_loss = total_loss / total val_loss.append(avg_loss) print('val acc {:2.2f}%'.format(good / total * 100)) print('val AVG loss: {:2.2f}'.format(avg_loss))
def add_loss_op(self, pred): loss = cross_entropy_loss(self.labels_placeholder, pred) return loss
def converged_gradient(self, num_iter, X, V, W, iter_check=50000, threshold=0.005, gradient_v=None, gradient_w=None, error=True, gradient_check=False, epsilon=10.**-5, x_j=None, y_j=None): training_error = None training_loss = None if num_iter > 1000000: return (True, training_error, training_loss) # There are two ways to determine if the gradient has converged. # (1) Use the training error (error=True) # (2) Use the magnitude of the gradient (error=False) # In both cases, training_error and training_loss are attached to the response # for the purposes of plotting. if error: if num_iter % iter_check != 0: return (False, training_error, training_loss) else: if gradient_check: # Randomly check five weights. for _ in range(5): # import pdb; pdb.set_trace() random_wi = np.random.randint(W.shape[0]) random_wj = np.random.randint(W.shape[1]) random_vi = np.random.randint(V.shape[0]) random_vj = np.random.randint(V.shape[1]) W_plus_epsilon = W.copy() W_plus_epsilon[random_wi][random_wj] = W_plus_epsilon[random_wi][random_wj] + epsilon Z_W_plus = self.perform_forward_pass(x_j, V, W_plus_epsilon)[1] W_minus_epsilon = W.copy() W_minus_epsilon[random_wi][random_wj] = W_minus_epsilon[random_wi][random_wj] - epsilon Z_W_minus = self.perform_forward_pass(x_j, V, W_minus_epsilon)[1] V_plus_epsilon = V.copy() V_plus_epsilon[random_vi][random_vj] = V_plus_epsilon[random_vi][random_vj] + epsilon Z_V_plus = self.perform_forward_pass(x_j, V_plus_epsilon, W)[1] V_minus_epsilon = V.copy() V_minus_epsilon[random_vi][random_vj] = V_minus_epsilon[random_vi][random_vj] - epsilon Z_V_minus = self.perform_forward_pass(x_j, V_minus_epsilon, W)[1] y = np.zeros(10) y[y_j] = 1 if self.loss_function == "mean-squared-error": W_plus_cost = mean_squared_error(Z_W_plus, y) W_minus_cost = mean_squared_error(Z_W_minus, y) V_plus_cost = mean_squared_error(Z_V_plus, y) V_minus_cost = mean_squared_error(Z_V_minus, y) else: W_plus_cost = cross_entropy_loss(Z_W_plus.T, y) W_minus_cost = cross_entropy_loss(Z_W_minus.T, y) V_plus_cost = cross_entropy_loss(Z_V_plus.T, y) V_minus_cost = cross_entropy_loss(Z_V_minus.T, y) gradient_approx_wij = (W_plus_cost - W_minus_cost) / (2. * epsilon) gradient_approx_vij = (V_plus_cost - V_minus_cost) / (2. * epsilon) if gradient_approx_wij > gradient_w[random_wi][random_wj] + threshold or \ gradient_approx_wij < gradient_w[random_wi][random_wj] - threshold or \ gradient_approx_vij > gradient_v[random_vi][random_vj] + threshold or \ gradient_approx_vij < gradient_v[random_vi][random_vj] - threshold: raise AssertionError("The gradient was incorrectly computed.") classifications_training, training_Z = self.predict(X, V, W, return_Z=True) training_error, training_indices_error = benchmark(classifications_training, self.labels) if self.validation_data is not None and self.validation_labels is not None: classifications_validation = self.predict(self.validation_data, V, W) validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels) if self.loss_function == "mean-squared-error": training_loss = mean_squared_error(training_Z.T, self.Y) else: training_loss = cross_entropy_loss(training_Z.T, self.Y) print("Completed %d iterations.\nThe training error is %.2f.\n The training loss is %.2f." % (num_iter, training_error, training_loss)) if self.validation_data is not None and self.validation_labels is not None: print("The error on the validation set is %.2f." % validation_error) if training_error < threshold: return (True, training_error, training_loss) return (False, training_error, training_loss) else: if num_iter % iter_check == 0: classifications_training, training_Z = self.predict(X, V, W, return_Z=True) training_error, indices_error = benchmark(classifications_training, self.labels) if self.validation_data is not None and self.validation_labels is not None: classifications_validation = self.predict(self.validation_data, V, W) validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels) if self.loss_function == "mean-squared-error": training_loss = mean_squared_error(training_Z.T, self.Y) else: training_loss = cross_entropy_loss(training_Z.T, self.Y) print("Completed %d iterations. The training error is %.2f. Training loss is %.2f" % (num_iter, training_error)) if self.validation_data is not None and self.validation_labels is not None: print("The error on the validation set is %.2f." % validation_error) if np.linalg.norm(gradient_v) < threshold and np.linalg.norm(gradient_w) < threshold: return (True, training_error, training_loss) else: return (False, training_error, training_loss)
X_train, Y_train = utils.shuffle(X_train, Y_train) for i in range(int(np.floor((train_size / batch_size)))): X = X_train[i * batch_size:(i + 1) * batch_size] Y = Y_train[i * batch_size:(i + 1) * batch_size] #w,b = utils.gradient_descent(X,Y,w,b,lr) w_grad, b_grad = utils.gradient_descent(X, Y, w, b) w -= lr / np.sqrt(step) * w_grad b -= lr / np.sqrt(step) * b_grad step += 1 y_train_pred = utils.f(X_train, w, b) Y_train_pred = np.round(y_train_pred) train_acc.append(utils.accruacy(Y_train_pred, Y_train)) train_loss.append( utils.cross_entropy_loss(y_train_pred, Y_train) / train_size) y_valid_pred = utils.f(X_valid, w, b) Y_valid_pred = np.round(y_valid_pred) valid_acc.append(utils.accruacy(Y_valid_pred, Y_valid)) valid_loss.append( utils.cross_entropy_loss(y_valid_pred, Y_valid) / valid_size) print('Training loss: {}'.format(train_loss[-1])) print('Validation loss: {}'.format(valid_loss[-1])) print('Training accuracy: {}'.format(train_acc[-1])) print('Validation accuracy: {}'.format(valid_acc[-1])) # Loss curve plt.plot(train_loss) plt.plot(valid_loss)
# ---------------Training Network------------------------ train_cost, val_cost, err_tr, err_val, nn_weight_list = Train_network( epochmax, reg_lambda, LearningRate, nnparams, layer_sizes, minibatchsize, momentum, activ_func, activ_Grad_func, X_train, Y_train, X_val, Y_val) print('epochmax:{:3.0f}'.format(epochmax), ' L2 Regularization: {:1.3f}'.format(reg_lambda), ' Learning rate: {:1.2f}'.format(LearningRate), ' Layer Sizes', layer_sizes) # ---------------Printing Results------------------------ activations = forward_prop(layer_sizes, nn_weight_list, X_train, Y_train, activ_func) output_p = activations[-1] J_train = cross_entropy_loss(num_labels, output_p, Y_train, reg_lambda, nn_weight_list) mean_err = Mean_classification_error(Y_train, output_p) print 'Train ', ' Loss: ', J_train, ' Error: ', mean_err activation_val = forward_prop(layer_sizes, nn_weight_list, X_val, Y_val, activ_func) output_p = activation_val[-1] J_val = cross_entropy_loss(num_labels, output_p, Y_val, reg_lambda, nn_weight_list) mean_err2 = Mean_classification_error(Y_val, output_p) print 'Validation ', 'Loss: ', J_val, 'Error: ', mean_err2 activation_test = forward_prop(layer_sizes, nn_weight_list, X_test, Y_test, activ_func) output_p = activation_test[-1] mean_err = Mean_classification_error(Y_test, output_p)
[training_data_dir], num_epochs=None) image_batch, label_batch = load_data_from_tfrecords(filename_queue, batch_size) label = tf.one_hot(label_batch, num_classes, 1, 0) label = tf.reshape(tf.cast(label, tf.float32), [batch_size, num_classes]) image = tf.cast(image_batch, tf.float32) image = tf.map_fn(lambda img: tf.image.per_image_standardization(img), image, dtype=tf.float32) output = model.lenet_advanced(image, num_classes, True, 0.5) output = tf.reshape(tf.cast(output, tf.float32), [batch_size, num_classes]) loss = utils.cross_entropy_loss(output, label) train = tf.train.AdamOptimizer(0.001).minimize(loss) global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() combined_op = tf.group(local_vars_init_op, global_vars_init_op) model_variables = slim.get_model_variables() saver = tf.train.Saver(model_variables) with tf.Session() as sess: sess.run(combined_op) # saver.restore(sess, '/home/kris/PycharmProjects/traffic_sign_recognition/lenet_parameters.ckpt') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(50000):
def grad_test_b(X_train, y_train): softmax_in = 2 softmax_out = 5 model = models.MyNeuralNetwork() model.add(layers.Softmax(softmax_in, softmax_out)) model.init() for p in model.parameters: p.grad = 0. eps0 = 1 eps = np.array([(0.5**i) * eps0 for i in range(10)]) d = np.random.random((1, 5)) d = d / np.sum(d) grad_diff = [] x_data = np.array([X_train[0]]) x_label = np.array([y_train[0]]) for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = model_grad.forward(x_data) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = model2.forward(x_data) grad_diff.append( np.abs( utils.cross_entropy_loss(probabilities_grad2, x_label) - utils.cross_entropy_loss(probabilities_grad, x_label))) fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True) fig.suptitle('Gradient test by b', fontsize=16) axs[0, 0].plot(eps, grad_diff) axs[0, 0].set_xlabel('$\epsilon$') axs[0, 0].set_title('$|f(x+\epsilon d) - f(x)|$') axs[0, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[0, 1].set_xlabel('$i$') axs[0, 1].set_title('rate of decrease') axs[0, 1].set_ylim([0, 1]) grad_diff = [] for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = copy.deepcopy(model_grad.forward(x_data)) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = copy.deepcopy(model2.forward(x_data)) model2.backward(x_label) grad_x = model2.graph[0].bias.grad grad_diff.append( np.abs( utils.cross_entropy_loss(probabilities_grad2, x_label) - utils.cross_entropy_loss(probabilities_grad, x_label) - epss * np.dot(d.flatten().T, grad_x.flatten()))) axs[1, 0].plot(eps, grad_diff) axs[1, 0].set_xlabel('$\epsilon$') axs[1, 0].set_title('$|f(x+\epsilon d) - f(x) - \epsilon d^{T} grad(x)|$') axs[1, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[1, 1].set_xlabel('$i$') axs[1, 1].set_title('rate of decrease') axs[1, 1].set_ylim([0, 1]) plt.show()