def train(self): self.model.train() best_acc = 0.0 save_model_prefix = os.path.join(self.model_path, self.config.model_prefix) for epoch in range(self.num_epochs): self.logger.info("Epoch %d/%d" % (epoch + 1, self.num_epochs)) start_time = time.time() for batch in self.train_data_loader: output = self.model(MyDataset.to(batch, self.config.device)) self.model.zero_grad() loss = self._calc_loss(output, batch) loss.backward() self.optimizer.step() self.writer.add_scalar("scalar/loss", loss.cpu().item(), epoch) time_diff = time.time() - start_time self.logger.info("epoch %d time consumed: %dm%ds." % (epoch + 1, time_diff // 60, time_diff % 60)) # evaluate model cur_acc = self.eval_dev(self.dev_data_loader) self.model.train() self.logger.info("Current accuracy: %.3f" % cur_acc) self.writer.add_scalar("scalar/accuracy", cur_acc) if cur_acc > best_acc: # and epoch > 10: save_filename = save_model_prefix + str(cur_acc) torch.save(self.model.state_dict(), save_filename) best_acc = cur_acc
def eval_dev(self, dev_data_loader): self.model.eval() correct_count = 0 total_count = 0 for batch in dev_data_loader: output = self.model(MyDataset.to(batch, self.config.device)) pred = torch.argmax(output, 1) correct_count += (pred.cpu().detach().numpy() == batch['answer_index'].numpy()).sum() total_count += len(batch['query_length']) return float(correct_count) / total_count