def train(self, model: Seq2Seq, discriminator: Discriminator, src_file_names: List[str], tgt_file_names: List[str], unsupervised_big_epochs: int, print_every: int, save_every: int, num_words_in_batch: int, max_length: int, teacher_forcing: bool, save_file: str="model", n_unsupervised_batches: int=None, enable_unsupervised_backtranslation: bool=False): if self.main_optimizer is None or self.discriminator_optimizer is None: logger.info("Initializing optimizers...") self.main_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=self.main_lr, betas=self.main_betas) self.discriminator_optimizer = optim.RMSprop(discriminator.parameters(), lr=self.discriminator_lr) for big_epoch in range(unsupervised_big_epochs): src_batch_gen = BatchGenerator(src_file_names, num_words_in_batch, max_len=max_length, vocabulary=self.vocabulary, language="src", max_batch_count=n_unsupervised_batches) tgt_batch_gen = BatchGenerator(tgt_file_names, num_words_in_batch, max_len=max_length, vocabulary=self.vocabulary, language="tgt", max_batch_count=n_unsupervised_batches) logger.debug("Src batch:" + str(next(iter(src_batch_gen)))) logger.debug("Tgt batch:" + str(next(iter(tgt_batch_gen)))) timer = time.time() main_loss_total = 0 discriminator_loss_total = 0 epoch = 0 for src_batch, tgt_batch in zip(src_batch_gen, tgt_batch_gen): model.train() discriminator_loss, losses = self.train_batch(model, discriminator, src_batch, tgt_batch, teacher_forcing) main_loss = sum(losses) main_loss_total += main_loss discriminator_loss_total += discriminator_loss if epoch % save_every == 0 and epoch != 0: save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + ".pt") if epoch % print_every == 0 and epoch != 0: main_loss_avg = main_loss_total / print_every discriminator_loss_avg = discriminator_loss_total / print_every main_loss_total = 0 discriminator_loss_total = 0 diff = time.time() - timer timer = time.time() translator = Translator(model, self.vocabulary, self.use_cuda) logger.debug("Auto: " + translator.translate_sentence("you can prepare your meals here .", "src", "src")) logger.debug("Translated: " + translator.translate_sentence("you can prepare your meals here .", "src", "tgt")) logger.info('%s big epoch, %s epoch, %s sec, %.4f main loss, ' '%.4f discriminator loss, current losses: %s' % (big_epoch, epoch, diff, main_loss_avg, discriminator_loss_avg, losses)) epoch += 1 save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + ".pt") if enable_unsupervised_backtranslation: self.current_translation_model = Translator(model, self.vocabulary, self.use_cuda) model = copy.deepcopy(model)
def train_supervised(self, model, discriminator, pair_file_names, vocabulary: Vocabulary, *, num_words_in_batch, big_epochs, max_length, max_batch_count=None, save_every=100, print_every=100, save_file="model"): if self.main_optimizer is None: logger.info("Initializing optimizers...") self.main_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=self.main_lr, betas=self.main_betas) self.discriminator_optimizer = optim.RMSprop(discriminator.parameters(), lr=self.discriminator_lr) for big_epoch in range(big_epochs): batch_gen = BilingualBatchGenerator(pair_file_names, max_length, num_words_in_batch, vocabulary, languages=["src", "tgt"], max_batch_count=max_batch_count) timer = time.time() loss_total = 0 epoch = 0 model.train() for src_batch, tgt_batch in batch_gen: logger.debug("Src batch: " + str(src_batch)) logger.debug("Tgt batch: " + str(tgt_batch)) loss = self.train_supervised_batch(model, src_batch, tgt_batch) Batch.print_pair(src_batch, tgt_batch, self.vocabulary, "src-tgt") logger.debug("Loss: " + str(loss)) loss_total += loss if epoch % save_every == 0 and epoch != 0: save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + "_supervised.pt") if epoch % print_every == 0 and epoch != 0: print_loss_avg = loss_total / print_every loss_total = 0 diff = time.time() - timer timer = time.time() translator = Translator(model, self.vocabulary, self.use_cuda) logger.debug("Translated: "+ translator.translate_sentence("you can prepare your meals here .", "src", "tgt")) logger.info('%s big epoch, %s epoch, %s sec, %.4f main loss' % (big_epoch, epoch, diff, print_loss_avg)) epoch += 1 save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + "_supervised.pt")