Пример #1
0
def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args):
    device = torch.device(args.device)
    for data_blob in logger.log_every(train_loader):

        optimizer.zero_grad()

        image1, image2, flow_gt, valid_flow_mask = (x.to(device)
                                                    for x in data_blob)
        flow_predictions = model(image1,
                                 image2,
                                 num_flow_updates=args.num_flow_updates)

        loss = utils.sequence_loss(flow_predictions, flow_gt, valid_flow_mask,
                                   args.gamma)
        metrics, _ = utils.compute_metrics(flow_predictions[-1], flow_gt,
                                           valid_flow_mask)

        metrics.pop("f1")
        logger.update(loss=loss, **metrics)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        optimizer.step()
        scheduler.step()
Пример #2
0
    def add_seq2seq(self):
        self.encoder_outputs, self.encoder_states = self.encoder.encoder(self.source_embedding,
                                                                         self.source_len,
                                                                         self.keep_prob)
        self.dec_inp_state = self.encoder_states[-1]
        outputs, self.dec_out_state, self.att_dists, self.p_gens = self.decoder.decoder(
            self.decoder_embedding,
            self.dec_inp_state,
            self.encoder_outputs,
            self.source_len)

        with tf.variable_scope('output'):
            w = tf.get_variable('w', [self.hidden_size, self.vcb_size], dtype=tf.float32, initializer=self.init)
            b = tf.get_variable('b', [self.vcb_size], dtype=tf.float32, initializer=self.init)
            vocab_dists = []
            for i, o in enumerate(outputs):
                p = tf.nn.xw_plus_b(o, w, b)
                vocab_dists.append(tf.nn.softmax(p))

            final_dists = self.cal_final_dist(vocab_dists, self.att_dists, self.p_gens)
            self.prob = tf.stack(final_dists, axis=1)

            if self.mode == 'train':
                mask = tf.sequence_mask(self.target_len, maxlen=self.tgt_len, dtype=tf.float32)
                self.loss = sequence_loss(final_dists, self.target, mask)
            else:
                topk_probs, topk_ids = tf.nn.top_k(self.prob, self.batch_size * 2)
                self.topk_idx = tf.squeeze(topk_ids, axis=1)
                self.topk_log_prob = tf.log(tf.squeeze(topk_probs, axis=1))
Пример #3
0
 def eval(self, batches):
     self.model.eval()
     loss = 0
     for batch in batches:
         logits = self.model(batch.questions, batch.src_lens, batch.actions_in)
         loss += sequence_loss(logits, batch.actions_out, pad_idx=PAD).mean()
     #print(f'loss in validation dataset: {loss / len(batches)}')
     return loss.item() / len(batches)
Пример #4
0
 def train(self, batches):
     self.model.train()
     for i, batch in enumerate(batches):
         logits = self.model(batch.questions, batch.src_lens, batch.actions_in)
         loss = sequence_loss(logits, batch.actions_out, pad_idx=PAD).mean()
         self.optimizer.zero_grad()
         loss.backward()
         nn.utils.clip_grad_norm_([p for p in self.model.parameters() if p.requires_grad], self.clip)
         self.optimizer.step()
         self.smooth_loss = loss if self.smooth_loss == 0 else \
             self.smooth_loss * 0.95 + loss * 0.05
Пример #5
0
 def criterion(logits, targets):
     return sequence_loss(logits, targets, xent_fn=None, pad_idx=PAD)
Пример #6
0
 def criterion(logits, targets):
     return sequence_loss(logits, targets, nll, pad_idx=PAD)
Пример #7
0
    padding_idx=vectorizer.char_vocab.mask_index)
optimizer = Adam(generater.parameters(), lr=0.01)

for epoch_index in range(10):
    # Train step
    generater.train()
    loss_epoch = 0
    acc_epoch = 0
    dataset.set_split('train')
    for batch_index, batch_dict in enumerate(
            generate_batches(dataset, batch_size=128)):
        generater.zero_grad()

        y_pred = generater(batch_dict['x_data'])

        loss = utils.sequence_loss(y_pred, batch_dict['y_target'],
                                   vectorizer.char_vocab.mask_index)
        acc = utils.compute_accuracy(y_pred, batch_dict['y_target'],
                                     vectorizer.char_vocab.mask_index)

        loss_epoch += (loss.item() - loss_epoch) / (batch_index + 1)
        acc_epoch += (acc - acc_epoch) / (batch_index + 1)

        loss.backward()

        optimizer.step()

    print("epoch {}: train loss {}, acc: {:.2f}".format(
        epoch_index, loss_epoch, acc_epoch))

    # Validate step
    generater.eval()