def train_one_epoch(self): FloatTensor, LongTensor, ByteTensor = utils.get_pytorch_tensors() losses = [] s_w2i = self.s_w2i t_w2i = self.t_w2i for i, batch in enumerate( utils.get_batch(self.args.batch_size, self.train_data)): srcs, tgts, s_len, t_len = utils.pad_to_batch(batch, s_w2i, t_w2i) start_decode = Variable(LongTensor([[t_w2i['<s>']] * tgts.size(0) ])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() output, hidden_c = self.encoder(srcs, s_len) preds = self.decoder(start_decode, hidden_c, tgts.size(1), output, True) loss = self.loss_function(preds, tgts.view(-1)) losses.append(loss.data.tolist()[0]) loss.backward() torch.nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) torch.nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() return np.mean(losses)
def train_one_epoch_autoencoder(self, obj): print('objective: %s' % obj) non_obj = 'src' if obj == 'tgt' else 'tgt' w2i = self.converters[obj]['w2i'] # i2w = self.converters[obj]['i2w'] embedder = self.embedders[obj] embedder_optim = self.optims[obj] losses = [] for batch in tqdm(self.train_dataloader): # add noise org_batch = copy.deepcopy(batch) batch[obj] = [sent_noise.run(s) for s in org_batch[obj]] batch[non_obj] = org_batch[obj] # convert string to ids batch = utils.prepare_batch(batch, w2i, w2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, w2i, w2i) start_decode =\ Variable(LT([[w2i['<s>']] * inputs.size(0)])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(embedder, inputs, input_lengths) preds = self.decoder(embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() embedder_optim.step() # print(np.mean(losses)) # preds = preds.view(inputs.size(0), targets.size(1), -1) # preds_max = torch.max(preds, 2)[1] # print(' '.join([i2w[p] for p in preds_max.data[0].tolist()])) # print(' '.join([i2w[p] for p in preds_max.data[1].tolist()])) return np.mean(losses)
def train_one_epoch_translator(self, _from='src', _to='tgt'): print('%s -> %s' % (_from, _to)) sw2i = self.converters[_from]['w2i'] tw2i = self.converters[_to]['w2i'] # ti2w = self.converters[_to]['i2w'] src_embedder = self.embedders[_from] tgt_embedder = self.embedders[_to] src_embedder_optim = self.optims['src'] tgt_embedder_optim = self.optims['tgt'] losses = [] for batch in tqdm(self.train_dataloader): batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() src_embedder.zero_grad() tgt_embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(src_embedder, inputs, input_lengths) preds = self.decoder(tgt_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() src_embedder_optim.step() tgt_embedder_optim.step() print(np.mean(losses)) preds = preds.view(inputs.size(0), targets.size(1), -1)
def translate(self, sents, input_lang): output_lang = 'src' if input_lang == 'tgt' else 'tgt' # print('Translating %s -> %s...' % (non_obj, obj)) sw2i = self.converters[input_lang]['w2i'] tw2i = self.converters[output_lang]['w2i'] ti2w = self.converters[output_lang]['i2w'] src_embedder = self.prev_embedders[input_lang] tgt_embedder = self.prev_embedders[output_lang] encoder = self.prev_encoder decoder = self.prev_decoder batch = {'src': sents, 'tgt': sents} batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1) if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = encoder(src_embedder, inputs, input_lengths) max_length = 50 preds = decoder(tgt_embedder, start_decode, hidden_c, max_length, output, None, True) preds = preds.view(inputs.size(0), max_length, -1) preds_max = torch.max(preds, 2)[1] result_sents = [] for i in range(len(sents)): result_sent =\ ' '.join([ti2w.get(p, '<UNK>') for p in preds_max.data[i].tolist()]) result_sents.append(result_sent) return result_sents
def calc_test_loss(self, log_dict): sw2i = self.sw2i tw2i = self.tw2i losses = [] for batch in self.test_dataloader: batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)]), requires_grad=False)\ .transpose(0, 1) if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(self.src_embedder, inputs, input_lengths) preds = self.decoder(self.tgt_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) log_dict['test_loss'] = np.mean(losses) log_dict['sample_translation'] = {} log_dict['sample_translation']['src'] =\ ' '.join([self.si2w[p] for p in inputs.data[0].tolist()]) log_dict['sample_translation']['tgt'] =\ ' '.join([self.ti2w[p] for p in targets.data[0].tolist()]) preds = preds.view(inputs.size(0), targets.size(1), -1) preds_max = torch.max(preds, 2)[1] log_dict['sample_translation']['prediction'] =\ ' '.join([self.ti2w[p] for p in preds_max.data[0].tolist()]) return log_dict
def train_one_epoch(self, log_dict): sw2i = self.sw2i tw2i = self.tw2i losses = [] for batch in tqdm(self.train_dataloader): batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() self.src_embedder.zero_grad() self.tgt_embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(self.src_embedder, inputs, input_lengths) preds = self.decoder(self.tgt_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm_(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm_(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() self.src_embedder_optim.step() self.tgt_embedder_optim.step() log_dict['train_loss'] = np.mean(losses)
def train_one_epoch_adversarial(self): dataset = self.train_dataloader.dataset batch_size = self.args.batch_size // 2 disc_losses = [] gen_losses = [] # Train Discriminator self.encoder.eval() self.src_embedder.eval() self.tgt_embedder.eval() self.discriminator.train() for _ in range(50): indxs = np.random.permutation(len(dataset))[:batch_size] srcs = np.array(dataset.src)[indxs] indxs = np.random.permutation(len(dataset))[:batch_size] tgts = np.array(dataset.tgt)[indxs] # For src lang batch = {'src': srcs, 'tgt': srcs} batch = utils.prepare_batch(batch, self.sw2i, self.sw2i) src_inputs, _, src_input_lengths, _ =\ utils.pad_to_batch(batch, self.sw2i, self.sw2i) if self.args.use_cuda: src_inputs = src_inputs.cuda() _, src_output = self.encoder(self.src_embedder, src_inputs, src_input_lengths) # For tgt lang batch = {'src': tgts, 'tgt': tgts} batch = utils.prepare_batch(batch, self.tw2i, self.tw2i) tgt_inputs, _, tgt_input_lengths, _ =\ utils.pad_to_batch(batch, self.tw2i, self.tw2i) if self.args.use_cuda: tgt_inputs = tgt_inputs.cuda() _, tgt_output = self.encoder(self.tgt_embedder, tgt_inputs, tgt_input_lengths) # Prepare input for discriminator disc_input =\ torch.cat([src_output.contiguous().view(batch_size, -1), tgt_output.contiguous().view(batch_size, -1)]) y = torch.FloatTensor(batch_size * 2).zero_() y[:batch_size] = 1 - 0.1 y[batch_size:] = 0.1 y = Variable(y) if self.args.use_cuda: y = y.cuda() self.discriminator.zero_grad() self.disc_optim.zero_grad() preds = self.discriminator(disc_input) loss = self.disc_loss_func(preds, y) loss.backward() disc_losses.append(loss.data[0]) self.disc_optim.step() # Train Generator (Encoder) self.encoder.train() self.src_embedder.train() self.tgt_embedder.train() self.discriminator.eval() for _ in range(50): indxs = np.random.permutation(len(dataset))[:batch_size] srcs = np.array(dataset.src)[indxs] indxs = np.random.permutation(len(dataset))[:batch_size] tgts = np.array(dataset.tgt)[indxs] # For src lang batch = {'src': srcs, 'tgt': srcs} batch = utils.prepare_batch(batch, self.sw2i, self.sw2i) src_inputs, _, src_input_lengths, _ =\ utils.pad_to_batch(batch, self.sw2i, self.sw2i) if self.args.use_cuda: src_inputs = src_inputs.cuda() _, src_output = self.encoder(self.src_embedder, src_inputs, src_input_lengths) # For tgt lang batch = {'src': tgts, 'tgt': tgts} batch = utils.prepare_batch(batch, self.tw2i, self.tw2i) tgt_inputs, _, tgt_input_lengths, _ =\ utils.pad_to_batch(batch, self.tw2i, self.tw2i) if self.args.use_cuda: tgt_inputs = tgt_inputs.cuda() _, tgt_output = self.encoder(self.tgt_embedder, tgt_inputs, tgt_input_lengths) # Prepare input for discriminator disc_input =\ torch.cat([src_output.contiguous().view(batch_size, -1), tgt_output.contiguous().view(batch_size, -1)]) y = torch.FloatTensor(batch_size * 2).zero_() y[:batch_size] = 1 - 0.1 y[batch_size:] = 0.1 y = Variable(y) if self.args.use_cuda: y = y.cuda() self.encoder.zero_grad() self.src_embedder.zero_grad() self.tgt_embedder.zero_grad() preds = self.discriminator(disc_input) loss = self.disc_loss_func(preds, 1 - y) gen_losses.append(loss.data[0]) loss.backward() self.enc_optim.zero_grad() self.src_embedder_optim.step() self.tgt_embedder_optim.step() return np.mean(disc_losses), np.mean(gen_losses)
def train_one_epoch_cross_domain(self, obj, first_iter=False): non_obj = 'src' if obj == 'tgt' else 'tgt' print('Calculating cross domain loss %s to %s...' % (obj, non_obj)) obj_w2i = self.converters[obj]['w2i'] # obj_i2w = self.converters[obj]['i2w'] non_obj_w2i = self.converters[non_obj]['w2i'] # non_obj_i2w = self.converters[non_obj]['i2w'] obj_embedder = self.embedders[obj] obj_embedder_optim = self.optims[obj] non_obj_embedder = self.embedders[non_obj] non_obj_embedder_optim = self.optims[non_obj] losses = [] for batch in tqdm(self.train_dataloader): # translate obj to non_obj with previous iter model if first_iter: src_to_tgt = True if obj == 'src' else False naive_y =\ [' '.join(self.bi_dict.translate(sent.split(), src_to_tgt=src_to_tgt)) for sent in batch[obj]] else: naive_y = self.translate(batch[obj], obj) noised_y = [sent_noise.run(sent) for sent in naive_y] batch['tgt'] = batch[obj] batch['src'] = noised_y # convert string to ids batch = utils.prepare_batch(batch, non_obj_w2i, obj_w2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, non_obj_w2i, obj_w2i) start_decode =\ Variable(LT([[non_obj_w2i['<s>']] * inputs.size(0)]))\ .transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() obj_embedder.zero_grad() non_obj_embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(non_obj_embedder, inputs, input_lengths) preds = self.decoder(obj_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() obj_embedder_optim.step() non_obj_embedder_optim.step() # print(np.mean(losses)) preds = preds.view(inputs.size(0), targets.size(1), -1) # preds_max = torch.max(preds, 2)[1] # print(' '.join([non_obj_i2w[p] for p in inputs.data[0].tolist()])) # print(' '.join([obj_i2w[p] for p in preds_max.data[0].tolist()])) # print(' '.join([obj_i2w[p] for p in targets.data[0].tolist()])) # print(' '.join([non_obj_i2w[p] for p in inputs.data[1].tolist()])) # print(' '.join([obj_i2w[p] for p in preds_max.data[1].tolist()])) # print(' '.join([obj_i2w[p] for p in targets.data[1].tolist()])) return np.mean(losses)