def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args): device = torch.device(args.device) for data_blob in logger.log_every(train_loader): optimizer.zero_grad() image1, image2, flow_gt, valid_flow_mask = (x.to(device) for x in data_blob) flow_predictions = model(image1, image2, num_flow_updates=args.num_flow_updates) loss = utils.sequence_loss(flow_predictions, flow_gt, valid_flow_mask, args.gamma) metrics, _ = utils.compute_metrics(flow_predictions[-1], flow_gt, valid_flow_mask) metrics.pop("f1") logger.update(loss=loss, **metrics) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) optimizer.step() scheduler.step()
def add_seq2seq(self): self.encoder_outputs, self.encoder_states = self.encoder.encoder(self.source_embedding, self.source_len, self.keep_prob) self.dec_inp_state = self.encoder_states[-1] outputs, self.dec_out_state, self.att_dists, self.p_gens = self.decoder.decoder( self.decoder_embedding, self.dec_inp_state, self.encoder_outputs, self.source_len) with tf.variable_scope('output'): w = tf.get_variable('w', [self.hidden_size, self.vcb_size], dtype=tf.float32, initializer=self.init) b = tf.get_variable('b', [self.vcb_size], dtype=tf.float32, initializer=self.init) vocab_dists = [] for i, o in enumerate(outputs): p = tf.nn.xw_plus_b(o, w, b) vocab_dists.append(tf.nn.softmax(p)) final_dists = self.cal_final_dist(vocab_dists, self.att_dists, self.p_gens) self.prob = tf.stack(final_dists, axis=1) if self.mode == 'train': mask = tf.sequence_mask(self.target_len, maxlen=self.tgt_len, dtype=tf.float32) self.loss = sequence_loss(final_dists, self.target, mask) else: topk_probs, topk_ids = tf.nn.top_k(self.prob, self.batch_size * 2) self.topk_idx = tf.squeeze(topk_ids, axis=1) self.topk_log_prob = tf.log(tf.squeeze(topk_probs, axis=1))
def eval(self, batches): self.model.eval() loss = 0 for batch in batches: logits = self.model(batch.questions, batch.src_lens, batch.actions_in) loss += sequence_loss(logits, batch.actions_out, pad_idx=PAD).mean() #print(f'loss in validation dataset: {loss / len(batches)}') return loss.item() / len(batches)
def train(self, batches): self.model.train() for i, batch in enumerate(batches): logits = self.model(batch.questions, batch.src_lens, batch.actions_in) loss = sequence_loss(logits, batch.actions_out, pad_idx=PAD).mean() self.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_([p for p in self.model.parameters() if p.requires_grad], self.clip) self.optimizer.step() self.smooth_loss = loss if self.smooth_loss == 0 else \ self.smooth_loss * 0.95 + loss * 0.05
def criterion(logits, targets): return sequence_loss(logits, targets, xent_fn=None, pad_idx=PAD)
def criterion(logits, targets): return sequence_loss(logits, targets, nll, pad_idx=PAD)
padding_idx=vectorizer.char_vocab.mask_index) optimizer = Adam(generater.parameters(), lr=0.01) for epoch_index in range(10): # Train step generater.train() loss_epoch = 0 acc_epoch = 0 dataset.set_split('train') for batch_index, batch_dict in enumerate( generate_batches(dataset, batch_size=128)): generater.zero_grad() y_pred = generater(batch_dict['x_data']) loss = utils.sequence_loss(y_pred, batch_dict['y_target'], vectorizer.char_vocab.mask_index) acc = utils.compute_accuracy(y_pred, batch_dict['y_target'], vectorizer.char_vocab.mask_index) loss_epoch += (loss.item() - loss_epoch) / (batch_index + 1) acc_epoch += (acc - acc_epoch) / (batch_index + 1) loss.backward() optimizer.step() print("epoch {}: train loss {}, acc: {:.2f}".format( epoch_index, loss_epoch, acc_epoch)) # Validate step generater.eval()