def train_step(self, X_train, y_train, state): y_preds = [] caches = [] loss = 0. # Forward for x, y_true in zip(X_train, y_train): y, state, cache = self.forward(x, state, train=True) loss += loss_fun.cross_entropy(self.model, y, y_true, lam=0) y_preds.append(y) caches.append(cache) loss /= X_train.shape[0] # Backward dh_next = np.zeros((1, self.H)) dc_next = np.zeros((1, self.H)) d_next = (dh_next, dc_next) grads = {k: np.zeros_like(v) for k, v in self.model.items()} for y_pred, y_true, cache in reversed( list(zip(y_preds, y_train, caches))): grad, d_next = self.backward(y_pred, y_true, d_next, cache) for k in grads.keys(): grads[k] += grad[k] for k, v in grads.items(): grads[k] = np.clip(v, -5., 5.) return grads, loss, state
def train(self, x_train, y_train): y_pred, cache = self.forward(x_train) loss = cross_entropy(y_pred, y_train) grad = self.backward(y_pred, y_train, cache) return grad, loss
def add_loss_layer(self, layer_name, prediction_layer_id, ground_truth_layer_id, loss_type): """ Adds a layer corresponding to the loss function :param layer_name: The name of the layer. Type=string :param prediction_layer_id: The identifier for the prediction layer :param ground_truth_layer_id: The identifier for the ground truth layer :param loss_type: The loss function to use. Available options defined by LossTypes. :return: None """ layer_id = self._get_layer_id(layer_name) assert self._layer_verifier( layer_id), 'Invalid: This layer is already present.' assert not self._layer_verifier( prediction_layer_id), 'Invalid: Output layer id is invalid.' assert not self._layer_verifier( ground_truth_layer_id ), 'Invalid: Ground truth layer id is invalid.' output = self.layers[prediction_layer_id] ground_truth = self.layers[ground_truth_layer_id] if loss_type == LossTypes.mse: self.layers[layer_id] = mse(ground_truth, output) elif loss_type == LossTypes.cross_entropy: self.layers[layer_id] = cross_entropy(ground_truth, output) else: raise ValueError('The type of loss can only be one of ["mse"]')
def train_step(self, X_train, y_train, h): ys = [] caches = [] loss = 0. # Forward for x, y in zip(X_train, y_train): y_pred, h, cache = self.forward(x, h, train=True) loss += loss_fun.cross_entropy(self.model, y_pred, y, lam=0) ys.append(y_pred) caches.append(cache) loss /= X_train.shape[0] # Backward dh_next = np.zeros((1, self.H)) grads = {k: np.zeros_like(v) for k, v in self.model.items()} for t in reversed(range(len(X_train))): grad, dh_next = self.backward(ys[t], y_train[t], dh_next, caches[t]) for k in grads.keys(): grads[k] += grad[k] for k, v in grads.items(): grads[k] = np.clip(v, -5., 5.) return grads, loss, h
def train(self, training_data, training_label, batch_size, epoch, weights_file): total_acc = 0 for e in range(epoch): for batch_index in range(0, training_data.shape[0], batch_size): # batch input if batch_index + batch_size < training_data.shape[0]: data = training_data[batch_index:batch_index + batch_size] label = training_label[batch_index:batch_index + batch_size] else: data = training_data[batch_index:training_data.shape[0]] label = training_label[batch_index:training_label.shape[0]] loss = 0 acc = 0 start_time = time.time() for b in range(len(data)): x = data[b] y = label[b] # print(y) # forward pass for l in range(self.lay_num): output = self.layers[l].forward(x) x = output loss += cross_entropy(output, y) if np.argmax(output) == np.argmax(y): acc += 1 total_acc += 1 # backward pass dy = y for l in range(self.lay_num - 1, -1, -1): dout = self.layers[l].backward(dy) dy = dout # time end_time = time.time() batch_time = end_time - start_time remain_time = ( training_data.shape[0] * epoch - batch_index - training_data.shape[0] * e) / batch_size * batch_time hrs = int(remain_time) / 3600 mins = int((remain_time / 60 - hrs * 60)) secs = int(remain_time - mins * 60 - hrs * 3600) # result loss /= batch_size batch_acc = float(acc) / float(batch_size) training_acc = float(total_acc) / float( (batch_index + batch_size) * (e + 1)) print( '=== Epoch: {0:d}/{1:d} === Iter:{2:d} === Loss: {3:.2f} === BAcc: {4:.2f} === TAcc: {5:.2f} === Remain: {6:d} Hrs {7:d} Mins {8:d} Secs ===' .format(e, epoch, batch_index + batch_size, loss, batch_acc, training_acc, int(hrs), int(mins), int(secs))) # dump weights and bias obj = [] for i in range(self.lay_num): cache = self.layers[i].extract() obj.append(cache) with open(weights_file, 'wb') as handle: pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)
def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) E = self.emit(Z) L = cross_entropy(E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank]
def train(iteration): model.train() avg_ans_loss = 0 avg_att_loss = 0 avg_sem_loss = 0 for batch_idx, (img, bbox, que, ans, op, att) in enumerate(trainloader): img, bbox, que, ans, op, att = Variable(img), Variable( bbox), Variable(que), Variable(ans), Variable(op), Variable( att) img, bbox, que, ans, op, att = img.cuda(), bbox.cuda(), que.cuda( ), ans.cuda(), op.cuda(), att.cuda() optimizer.zero_grad() #different training objetives output, pred_op, pred_att = model(img, bbox, que) ans_mask, att_mask = get_mask(op) ans_loss = cross_entropy(output, ans) att_loss = attention_loss_mask_kld(pred_att, att, att_mask) sem_loss = semantic_loss(pred_op, op) loss = ans_loss + att_loss * args.alpha * max( (1 + np.cos(np.pi * (iteration / 300000))), 0) + args.beta * sem_loss # originally 0.5, 300000 loss.backward() if not args.clip == 0: clip_grad_norm_(model.parameters(), args.clip) optimizer.step() avg_ans_loss = (avg_ans_loss * np.maximum(0, batch_idx) + ans_loss.data.cpu().numpy()) / (batch_idx + 1) avg_att_loss = (avg_att_loss * np.maximum(0, batch_idx) + att_loss.data.cpu().numpy()) / (batch_idx + 1) avg_sem_loss = (avg_sem_loss * np.maximum(0, batch_idx) + sem_loss.data.cpu().numpy()) / (batch_idx + 1) if batch_idx % 25 == 0: with tf_summary_writer.as_default(): tf.summary.scalar('answer loss', avg_ans_loss, step=iteration) tf.summary.scalar('step attention loss', avg_att_loss, step=iteration) tf.summary.scalar('semantic loss', avg_sem_loss, step=iteration) iteration += 1 return iteration
def forward(self, enc_outputs, enc_lengths, src_ids, tgt_ids, label_smooth=0): bz = enc_outputs.shape[0] if bz != src_ids.shape[0]: raise ValueError("enc_outputs does not match src_ids.") encout_max_length = enc_outputs.shape[1] dec_max_length = src_ids.shape[1] att_masks = ( 1 - get_seq_mask_by_shape(encout_max_length, dec_max_length, enc_lengths).transpose(1, 2)).byte() rnn_in = self.emb(src_ids) rnn_in = self.dropout(rnn_in) rnn = self.rnns[0] rnn_output, _ = rnn(rnn_in) for l in range(1, self.num_layers): att_scores, att = self.attentions[l - 1](enc_outputs, rnn_output, enc_outputs, mask=att_masks) rnn_in = torch.cat([rnn_output, att], dim=-1) rnn_in = self.dropout(rnn_in) rnn_output, _ = self.rnns[l](rnn_in) rnn_output = self.dropout(rnn_output) logits = self.output_affine(rnn_output) ce = cross_entropy(logits.view(-1, logits.size(-1)), tgt_ids.view(-1)) if label_smooth > 0: ls = uniform_label_smooth_regulerizer( logits.view(-1, logits.size(-1)), tgt_ids.view(-1)) loss = (1 - label_smooth) * ce + label_smooth * ls else: loss = ce return loss
def __init__(self): self.embed = nn.Embedding(3,4) self.embed.embed_w.const = em self.fc3=nn.rnn((20,2)) def forward(self,x): y1=self.embed(x) y2=reshape(y1,[-1,4*5]) y = self.fc3(y2) return y x = node.Node("x") labels=node.Node("label") c = embnet() eloss = loss.cross_entropy(c(x),labels) a=[np.array([0, 2, 1, 2,1]).reshape(1,5),np.array([0, 2, 1, 2,2]).reshape(1,5)] b=[np.array([1,0]).reshape(1,2),np.array([0,1]).reshape(1,2)] optimizer=opt.SGD(eloss,c.parameters()) for epoch in range(10): for batch in range(2): optimizer.step(feed_dict={x:a[batch],labels:b[batch]}) print(optimizer.parameters[1].const,optimizer.parameters[0].const) exit() class mynet(module.Module): def __init__(self): self.conv1 = nn.Conv2d(filter_shapes=(1,6,5,5),padding=(2,2),stride=(1,1)) self.pool1 = nn.MaxPool(ksize=(2,2),padding=(0,0),stride=(2,2)) self.conv2 = nn.Conv2d(filter_shapes=(6,16,5,5),padding=(0,0),stride=(1,1)) self.pool2 = nn.MaxPool(ksize=(2,2),padding=(0,0),stride=(2,2))
if args.tf_model_type == 'capsule-A': poses, activations = capsule_model_A(X_embedding, args.num_classes) if args.tf_model_type == 'capsule-B': poses, activations = capsule_model_B(X_embedding, args.num_classes) if args.tf_model_type == 'CNN': poses, activations = baseline_model_cnn(X_embedding, args.num_classes) if args.tf_model_type == 'KIMCNN': poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent, args.num_classes) if args.tf_loss_type == 'spread_loss': loss = spread_loss(y, activations, margin) if args.tf_loss_type == 'margin_loss': loss = margin_loss(y, activations) if args.tf_loss_type == 'cross_entropy': loss = cross_entropy(y, activations) y_pred = tf.argmax(activations, axis=1, name="y_proba") correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct") accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy") # tf.summary.scalar('accuracy', accuracy) # merged = tf.summary.merge_all() # writer = tf.summary.FileWriter('/tmp/writer_log') optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss, name="training_op") gradients, variables = zip(*optimizer.compute_gradients(loss)) grad_check = [ tf.check_numerics(g, message='Gradient NaN Found!') for g in gradients if g is not None
def loss(self, Yh, Y): return cross_entropy(Yh, Y)
def main(_): # Create the input placeholder x_image = tf.placeholder( tf.float32, [batch_size, input_res, input_res, input_channels]) # Define loss and optimizer y_image_ = tf.placeholder(tf.float32, [batch_size, input_res, input_res, 1]) y_image, mode_training = model.make_unet(x_image=x_image) # Build the objective loss function as well as the accuracy parts of the graph total_loss = loss.cross_entropy(y_image, y_image_) tf.summary.scalar('total_loss', total_loss) global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') learning_rate = tf.train.piecewise_constant( global_step, LEARNING_RATE_PARAMS["boundaries"], LEARNING_RATE_PARAMS["values"]) with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer(learning_rate).minimize( total_loss, global_step=global_step) # Summaries merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOGS_DIR, "train"), tf.get_default_graph()) val_writer = tf.summary.FileWriter(os.path.join(LOGS_DIR, "val"), tf.get_default_graph()) # Dataset train_dataset_filename = os.path.join(TFRECORDS_DIR, "train.tfrecord") train_images, train_polygons, train_raster_polygons = dataset.read_and_decode( train_dataset_filename, input_res, output_vertex_count, batch_size, INPUT_DYNAMIC_RANGE) val_dataset_filename = os.path.join(TFRECORDS_DIR, "val.tfrecord") val_images, val_polygons, val_raster_polygons = dataset.read_and_decode( val_dataset_filename, input_res, output_vertex_count, batch_size, INPUT_DYNAMIC_RANGE, augment_dataset=False) # Savers saver = tf.train.Saver() # The op for initializing the variables. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) # Restore checkpoint if one exists checkpoint = tf.train.get_checkpoint_state(CHECKPOINTS_DIR) if checkpoint and checkpoint.model_checkpoint_path: # First check if the whole model has a checkpoint print("Restoring {} checkpoint {}".format( model_name, checkpoint.model_checkpoint_path)) saver.restore(sess, checkpoint.model_checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) init_plots() print("Model has {} trainable variables".format( tf_utils.count_number_trainable_params())) i = tf.train.global_step(sess, global_step) while i <= max_iter: train_image_batch, train_polygon_batch, train_raster_polygon_batch = sess.run( [train_images, train_polygons, train_raster_polygons]) if i % train_loss_accuracy_steps == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() train_summary, _, train_loss, train_y_image = sess.run( [merged_summaries, train_step, total_loss, y_image], feed_dict={ x_image: train_image_batch, y_image_: train_raster_polygon_batch, mode_training: True }, options=run_options, run_metadata=run_metadata) train_writer.add_summary(train_summary, i) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) print('step %d, training loss = %g' % (i, train_loss)) plot_results(1, train_image_batch, train_polygon_batch, train_y_image) else: _ = sess.run( [train_step], feed_dict={ x_image: train_image_batch, y_image_: train_raster_polygon_batch, mode_training: True }) # Measure validation loss and accuracy if i % val_loss_accuracy_steps == 1: val_image_batch, val_polygon_batch, val_raster_polygon_batch = sess.run( [val_images, val_polygons, val_raster_polygons]) val_summary, val_loss, val_y_image = sess.run( [merged_summaries, total_loss, y_image], feed_dict={ x_image: val_image_batch, y_image_: val_raster_polygon_batch, mode_training: True }) val_writer.add_summary(val_summary, i) print('step %d, validation loss = %g' % (i, val_loss)) plot_results(2, val_image_batch, val_polygon_batch, val_y_image) # Save checkpoint if i % checkpoint_steps == (checkpoint_steps - 1): saver.save(sess, os.path.join(CHECKPOINTS_DIR, model_name), global_step=global_step) i = tf.train.global_step(sess, global_step) coord.request_stop() coord.join(threads) train_writer.close() val_writer.close()
def main(_): # Create the input placeholder x_image = tf.placeholder(tf.float32, [batch_size, input_res, input_res, input_channels]) # Define loss and optimizer y_image_ = tf.placeholder(tf.float32, [batch_size, input_res, input_res, 1]) y_image, mode_training = model.make_unet(x_image=x_image) total_loss = loss.cross_entropy(y_image, y_image_) # Dataset test_dataset_filename = os.path.join(TFRECORDS_DIR, "test.tfrecord") test_images, test_polygons, test_raster_polygons = dataset.read_and_decode(test_dataset_filename, input_res, output_vertex_count, batch_size, INPUT_DYNAMIC_RANGE, augment_dataset=False) # Saver saver = tf.train.Saver() with tf.Session() as sess: # Restore checkpoint if one exists checkpoint = tf.train.get_checkpoint_state(CHECKPOINTS_DIR) if checkpoint and checkpoint.model_checkpoint_path: # First check if the whole model has a checkpoint print("Restoring {} checkpoint {}".format(model_name, checkpoint.model_checkpoint_path)) saver.restore(sess, checkpoint.model_checkpoint_path) else: print("No checkpoint was found, exiting...") exit() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) test_image_batch, test_polygon_batch, test_raster_polygon_batch = sess.run([test_images, test_polygons, test_raster_polygons]) test_loss, test_y_image_batch = sess.run( [total_loss, y_image], feed_dict={ x_image: test_image_batch, y_image_: test_raster_polygon_batch, mode_training: True }) print("Test loss= {}".format(test_loss)) # Threshold output test_raster_polygon_batch = 0.5 < test_raster_polygon_batch test_y_image_batch = 0.5 < test_y_image_batch # Polygonize print("Polygonizing...") y_coord_batch_list = [] for test_raster_polygon, test_y_image in zip(test_raster_polygon_batch, test_y_image_batch): test_raster_polygon = test_raster_polygon[:, :, 0] test_y_image = test_y_image[:, :, 0] # Select only one blob seed = np.logical_and(test_raster_polygon, test_y_image) test_y_image = skimage.morphology.reconstruction(seed, test_y_image, method='dilation', selem=None, offset=None) # Vectorize test_y_coords = polygon_utils.raster_to_polygon(test_y_image, output_vertex_count) y_coord_batch_list.append(test_y_coords) y_coord_batch = np.array(y_coord_batch_list) # Normalize y_coord_batch = y_coord_batch / input_res if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) save_results(test_image_batch, test_polygon_batch, y_coord_batch, SAVE_DIR) coord.request_stop() coord.join(threads)
if args.model_type == 'capsule-A': poses, activations = capsule_model_A(X_embedding, args.num_classes) if args.model_type == 'capsule-B': poses, activations = capsule_model_B(X_embedding, args.num_classes) if args.model_type == 'CNN': poses, activations = baseline_model_cnn(X_embedding, args.num_classes) if args.model_type == 'KIMCNN': poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent, args.num_classes) if args.loss_type == 'spread_loss': loss = spread_loss(y, activations, margin) if args.loss_type == 'margin_loss': loss = margin_loss(y, activations) if args.loss_type == 'cross_entropy': loss = cross_entropy(y, activations) y_pred = tf.argmax(activations, axis=1, name="y_proba") correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct") accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy") optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss, name="training_op") gradients, variables = zip(*optimizer.compute_gradients(loss)) grad_check = [tf.check_numerics(g, message='Gradient NaN Found!') for g in gradients if g is not None] + [tf.check_numerics(loss, message='Loss NaN Found')] with tf.control_dependencies(grad_check): training_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)
def loss(self, x, t): z = self.predict(x) y = softmax(z) loss = cross_entropy(y, t) return loss
def loss(self, x, t): y = self.predict(x) return cross_entropy(y, t)