def apply_loss_function(self, global_step): # loss function self.cos_loss = cross_entropy_loss(self.img_last_layer, self.img_label, self.alpha, normed=True, balanced=True) self.q_loss = self.cq_lambda * quantization_loss(self.img_last_layer) self.loss = self.cos_loss + self.q_loss # Last layer has a 10 times learning rate self.lr = tf.train.exponential_decay(self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9) grads_and_vars = opt.compute_gradients( self.loss, self.train_layers + self.train_last_layer) fcgrad, _ = grads_and_vars[-2] fbgrad, _ = grads_and_vars[-1] # for debug self.grads_and_vars = grads_and_vars tf.summary.scalar('loss', self.loss) tf.summary.scalar('ce_loss', self.cos_loss) tf.summary.scalar('q_loss', self.q_loss) tf.summary.scalar('lr', self.lr) self.merged = tf.summary.merge_all() if self.finetune_all: return opt.apply_gradients( [(grads_and_vars[0][0], self.train_layers[0]), (grads_and_vars[1][0] * 2, self.train_layers[1]), (grads_and_vars[2][0], self.train_layers[2]), (grads_and_vars[3][0] * 2, self.train_layers[3]), (grads_and_vars[4][0], self.train_layers[4]), (grads_and_vars[5][0] * 2, self.train_layers[5]), (grads_and_vars[6][0], self.train_layers[6]), (grads_and_vars[7][0] * 2, self.train_layers[7]), (grads_and_vars[8][0], self.train_layers[8]), (grads_and_vars[9][0] * 2, self.train_layers[9]), (grads_and_vars[10][0], self.train_layers[10]), (grads_and_vars[11][0] * 2, self.train_layers[11]), (grads_and_vars[12][0], self.train_layers[12]), (grads_and_vars[13][0] * 2, self.train_layers[13]), (fcgrad * 10, self.train_last_layer[0]), (fbgrad * 20, self.train_last_layer[1])], global_step=global_step) else: return opt.apply_gradients( [(fcgrad * 10, self.train_last_layer[0]), (fbgrad * 20, self.train_last_layer[1])], global_step=global_step)
def train(model: models.Model, optimizer: optimizers.Optimizer, train_instances: List[Dict[str, np.ndarray]], validation_instances: List[Dict[str, np.ndarray]], num_epochs: int, batch_size: int, serialization_dir: str = None) -> tf.keras.Model: """ Trains a model on the give training instances as configured and stores the relevant files in serialization_dir. Returns model and some important metrics. """ print("\nGenerating Training batches:") train_batches = generate_batches(train_instances, batch_size) print("Generating Validation batches:") validation_batches = generate_batches(validation_instances, batch_size) train_batch_labels = [ batch_inputs.pop("labels") for batch_inputs in train_batches ] validation_batch_labels = [ batch_inputs.pop("labels") for batch_inputs in validation_batches ] tensorboard_logs_path = os.path.join(serialization_dir, f'tensorboard_logs') tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path) best_epoch_validation_accuracy = float("-inf") best_epoch_validation_loss = float("inf") for epoch in range(num_epochs): print(f"\nEpoch {epoch}") total_training_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm(list(zip(train_batches, train_batch_labels))) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): with tf.GradientTape() as tape: logits = model(**batch_inputs, training=True)["logits"] loss_value = cross_entropy_loss(logits, batch_labels) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) total_training_loss += loss_value batch_predictions = np.argmax(tf.nn.softmax(logits, axis=-1).numpy(), axis=-1) total_correct_predictions += ( batch_predictions == batch_labels).sum() total_predictions += batch_labels.shape[0] description = ( "Average training loss: %.2f Accuracy: %.2f " % (total_training_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_training_loss = total_training_loss / len(train_batches) training_accuracy = total_correct_predictions / total_predictions total_validation_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm( list(zip(validation_batches, validation_batch_labels))) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): logits = model(**batch_inputs, training=False)["logits"] loss_value = cross_entropy_loss(logits, batch_labels) total_validation_loss += loss_value batch_predictions = np.argmax(tf.nn.softmax(logits, axis=-1).numpy(), axis=-1) total_correct_predictions += ( batch_predictions == batch_labels).sum() total_predictions += batch_labels.shape[0] description = ( "Average validation loss: %.2f Accuracy: %.2f " % (total_validation_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_validation_loss = total_validation_loss / len( validation_batches) validation_accuracy = total_correct_predictions / total_predictions if validation_accuracy > best_epoch_validation_accuracy: print( "Model with best validation accuracy so far: %.2f. Saving the model." % (validation_accuracy)) classifier.save_weights( os.path.join(serialization_dir, f'model.ckpt')) best_epoch_validation_loss = average_validation_loss best_epoch_validation_accuracy = validation_accuracy with tensorboard_writer.as_default(): tf.summary.scalar("loss/training", average_training_loss, step=epoch) tf.summary.scalar("loss/validation", average_validation_loss, step=epoch) tf.summary.scalar("accuracy/training", training_accuracy, step=epoch) tf.summary.scalar("accuracy/validation", validation_accuracy, step=epoch) tensorboard_writer.flush() metrics = { "training_loss": float(average_training_loss), "validation_loss": float(average_validation_loss), "training_accuracy": float(training_accuracy), "best_epoch_validation_accuracy": float(best_epoch_validation_accuracy), "best_epoch_validation_loss": float(best_epoch_validation_loss) } print("Best epoch validation accuracy: %.4f, validation loss: %.4f" % (best_epoch_validation_accuracy, best_epoch_validation_loss)) return {"model": model, "metrics": metrics}
epoch_counter = 0 while epoch_counter < EPOCHS: # Grabbing a mini-batch X_mb, y_mb = mb.fetch_minibatch() # Explicit check to see if we have run out of data # If so, increment the epoch and reset the MiniBatcher if isinstance(X_mb, bool): epoch_counter += 1 mb.new_epoch() X_mb, y_mb = mb.fetch_minibatch() output = nn.forward_pass(input=X_mb) sm_output = nn.softmax(input=output) loss = cross_entropy_loss(y_pred=sm_output, y_actual=y_mb) grad = nn.get_gradient(input=X_mb, y_pred=sm_output, y_actual=y_mb) adam.update_weights(weights=nn.weights, gradient=grad) historical_losses.append(loss) # Our final prediction... y_pred = nn.softmax(nn.forward_pass(input=train_samples)) print(f'Trained network predictions: {np.argmax(y_pred, axis=1)}') print(f'Ground-truth values: {train_labels}') if np.array_equal(np.argmax(y_pred, axis=1), train_labels): print('Congrats, your network has solved the XOR problem!') else: print('Looks like your network is not quite there... Try more epochs.') # Converting the historical_loss list into a plot... plt.plot(historical_losses)
# params lr = 1e-5 total_updates = 10000 log_interval = 1000 F = Model() D = Data("../data/mnist/train-images",batch_size = 32) x_train,y_train = D.get_batch() x_train = tf.reshape(x_train,[-1,28*28]) opt = tf.train.GradientDescentOptimizer(lr) y_logits, _ = F.inference(x_train) loss = cross_entropy_loss(logits = y_logits, labels = y_train) correct_prediction = tf.equal(y_train, tf.cast(tf.argmax(y_logits, 1),dtype=tf.int32)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) back_prop = opt.minimize(loss) with tf.Session() as sess: # tensor flow things init = tf.global_variables_initializer() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(init) for i in range(1,total_updates+1): sess.run(
def train_svhn(): """Train SVHN data for a number of steps.""" graph = tf.Graph() with graph.as_default(): # Get images and labels for SVHN. train_dataset, test_dataset, train_labels, \ test_labels, train_lengths, test_lengths = preprocessing.load_svhn() # Get images and labels for CIFAR-10. # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(FLAGS.BATCH_SIZE, FLAGS.IM_SIZE, FLAGS.IM_SIZE, FLAGS.num_channels)) tf_train_lengths = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) tf_train_labels1 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) tf_train_labels2 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) tf_train_labels3 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) tf_train_labels4 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) tf_train_labels5 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE)) #tf_test_dataset = tf.constant(test_dataset) # Build a Graph that computes the logits predictions from the # inference model. logits0, logits1, logits2, logits3, logits4, logits5 = svhn_model( tf_train_dataset) train_predictions0, train_predictions1, train_predictions2, train_predictions3, train_predictions4, train_predictions5 = [ tf.nn.softmax(logits0), tf.nn.softmax(logits1), tf.nn.softmax(logits2), tf.nn.softmax(logits3), tf.nn.softmax(logits4), tf.nn.softmax(logits5) ] # Calculate loss. loss = cross_entropy_loss( logits0, tf_train_lengths) + cross_entropy_loss( logits1, tf_train_labels1) + cross_entropy_loss( logits2, tf_train_labels2) + cross_entropy_loss( logits3, tf_train_labels3) + cross_entropy_loss( logits4, tf_train_labels4) + cross_entropy_loss( logits5, tf_train_labels5) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(1e-3, global_step, 7500, 0.5, staircase=True) optimizer = tf.train.AdagradOptimizer(learning_rate).minimize( loss, global_step=global_step) with tf.Session(graph=graph): init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) print('Initialized') for step in range(FLAGS.MAX_STEPS): start_time = time.time() offset = (step * FLAGS.BATCH_SIZE) % (train_labels.shape[0] - FLAGS.BATCH_SIZE) batch_data = train_dataset[offset:(offset + FLAGS.BATCH_SIZE), :, :, :] batch_lengths = train_lengths[offset:(offset + FLAGS.BATCH_SIZE)] batch_labels1 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 0] batch_labels2 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 1] batch_labels3 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 2] batch_labels4 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 3] batch_labels5 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 4] feed_dict = { tf_train_dataset: batch_data, tf_train_lengths: batch_lengths, tf_train_labels1: batch_labels1, tf_train_labels2: batch_labels2, tf_train_labels3: batch_labels3, tf_train_labels4: batch_labels4, tf_train_labels5: batch_labels5 } _, loss_value, predictions0, predictions1, predictions2, predictions3, predictions4, predictions5 = sess.run( [ optimizer, loss, train_predictions0, train_predictions1, train_predictions2, train_predictions3, train_predictions4, train_predictions5 ], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 50 == 0: accuracy_batch = ((accuracy(predictions0, batch_lengths) + accuracy(predictions1, batch_labels1) + accuracy(predictions2, batch_labels2) + accuracy(predictions3, batch_labels3) + accuracy(predictions4, batch_labels4) + accuracy(predictions5, batch_labels5)) / 6) format_str = ( '%s: step %d, loss = %.2f, batch accuracy = %.1f%% (%.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, accuracy_batch, duration))
def compute_loss(self, true_hm, true_wh, true_reg, reg_mask, ind, true_cls): hm_loss = loss.focal_loss(self.pred_hm, true_hm) * cfgs.HM_LOSS_WEIGHT wh_loss = loss.reg_l1_loss(self.pred_wh, true_wh, ind, reg_mask) * cfgs.WH_LOSS_WEIGHT reg_loss = loss.reg_l1_loss(self.pred_reg, true_reg, ind, reg_mask) * cfgs.REG_LOSS_WEIGHT cls_loss = loss.cross_entropy_loss(self.pred_cls, true_cls, reg_mask) * cfgs.CLS_LOSS_WEIGHT return hm_loss, wh_loss, reg_loss, cls_loss