def eval(self): params = self.params self.model.eval() lang_id1 = params.lang2id[params.src_lang] lang_id2 = params.lang2id[params.trg_lang] valid = 0 total = 0 for sent1, len1, sent2, len2, y, _, _ in tqdm( self.dataloader['valid']): sent1, len1 = truncate(sent1, len1, params.max_len, params.eos_index) sent2, len2 = truncate(sent2, len2, params.max_len, params.eos_index) x, lengths, positions, langs = concat_batches(sent1, len1, lang_id1, sent2, len2, lang_id2, params.pad_index, params.eos_index, reset_positions=True) # cuda x, y, lengths, positions, langs = to_cuda(x, y, lengths, positions, langs, gpu=self.gpu) # forward output = self.model(x, lengths, positions, langs) predictions = output.data.max(1)[1] # update statistics valid += predictions.eq(y).sum().item() total += len(len1) # compute accuracy acc = 100.0 * valid / total scores = {} scores['acc'] = acc return scores
def run(model, params, dico, data, split, src_lang, trg_lang, gen_type="src2trg", alpha=1., beta=1., gamma=0., uniform=False, iter_mult=1, mask_schedule="constant", constant_k=1, batch_size=8, gpu_id=0): #n_batches = math.ceil(len(srcs) / batch_size) if gen_type == "src2trg": ref_path = params.ref_paths[(src_lang, trg_lang, split)] elif gen_type == "trg2src": ref_path = params.ref_paths[(trg_lang, src_lang, split)] refs = [s.strip() for s in open(ref_path, encoding="utf-8").readlines()] hypothesis = [] #hypothesis_selected_pos = [] for batch_n, batch in enumerate( get_iterator(params, data, split, "de", "en")): (src_x, src_lens), (trg_x, trg_lens) = batch batches, batches_src_lens, batches_trg_lens, total_scores = [], [], [], [] #batches_selected_pos = [] for i_topk_length in range(params.num_topk_lengths): # overwrite source/target lengths according to dataset stats if necessary if params.de2en_lengths != None and params.en2de_lengths != None: src_lens_item = src_lens[0].item() - 2 # remove BOS, EOS trg_lens_item = trg_lens[0].item() - 2 # remove BOS, EOS if gen_type == "src2trg": if len(params.de2en_lengths[src_lens_item].keys() ) < i_topk_length + 1: break data_trg_lens = sorted( params.de2en_lengths[src_lens_item].items(), key=operator.itemgetter(1)) data_trg_lens_item = data_trg_lens[-1 - i_topk_length][0] + 2 # overwrite trg_lens trg_lens = torch.ones_like(trg_lens) * data_trg_lens_item elif gen_type == "trg2src": if len(params.en2de_lengths[trg_lens_item].keys() ) < i_topk_length + 1: break data_src_lens = sorted( params.en2de_lengths[trg_lens_item].items(), key=operator.itemgetter(1)) # take i_topk_length most likely length and add BOS, EOS data_src_lens_item = data_src_lens[-1 - i_topk_length][0] + 2 # overwrite src_lens src_lens = torch.ones_like(src_lens) * data_src_lens_item if gen_type == "src2trg": sent1_input = src_x sent2_input = create_masked_batch(trg_lens, params, dico) dec_len = torch.max(trg_lens).item() - 2 # cut BOS, EOS elif gen_type == "trg2src": sent1_input = create_masked_batch(src_lens, params, dico) sent2_input = trg_x dec_len = torch.max(src_lens).item() - 2 # cut BOS, EOS batch, lengths, positions, langs = concat_batches(sent1_input, src_lens, params.lang2id[src_lang], \ sent2_input, trg_lens, params.lang2id[trg_lang], \ params.pad_index, params.eos_index, \ reset_positions=True, assert_eos=True) # not sure about it if gpu_id >= 0: batch, lengths, positions, langs, src_lens, trg_lens = \ to_cuda(batch, lengths, positions, langs, src_lens, trg_lens) with torch.no_grad(): batch, total_score_argmax_toks = \ _evaluate_batch(model, params, dico, batch, lengths, positions, langs, src_lens, trg_lens, gen_type, alpha, beta, gamma, uniform, dec_len, iter_mult, mask_schedule, constant_k) batches.append(batch.clone()) batches_src_lens.append(src_lens.clone()) batches_trg_lens.append(trg_lens.clone()) total_scores.append(total_score_argmax_toks) #batches_selected_pos.append(selected_pos) best_score_idx = np.array(total_scores).argmax() batch, src_lens, trg_lens = batches[best_score_idx], batches_src_lens[ best_score_idx], batches_trg_lens[best_score_idx] #selected_pos = batches_selected_pos[best_score_idx] #if gen_type == "src2trg": # hypothesis_selected_pos.append([selected_pos, trg_lens.item()-2]) #elif gen_type == "trg2src": # hypothesis_selected_pos.append([selected_pos, src_lens.item()-2]) for batch_idx in range(batch_size): src_len = src_lens[batch_idx].item() tgt_len = trg_lens[batch_idx].item() if gen_type == "src2trg": generated = batch[src_len:src_len + tgt_len, batch_idx] else: generated = batch[:src_len, batch_idx] # extra <eos> eos_pos = (generated == params.eos_index).nonzero() if eos_pos.shape[0] > 2: generated = generated[:(eos_pos[1, 0].item() + 1)] hypothesis.extend(convert_to_text(generated.unsqueeze(1), \ torch.Tensor([generated.shape[0]]).int(), \ dico, params)) print("Ex {0}\nRef: {1}\nHyp: {2}\n".format( batch_n, refs[batch_n].encode("utf-8"), hypothesis[-1].encode("utf-8"))) hyp_path = os.path.join(params.hyp_path, 'decoding.txt') hyp_path_tok = os.path.join(params.hyp_path, 'decoding.tok.txt') #hyp_selected_pos_path = os.path.join(params.hyp_path, "selected_pos.pkl") # export sentences to hypothesis file / restore BPE segmentation with open(hyp_path, 'w', encoding='utf-8') as f: f.write('\n'.join(hypothesis) + '\n') with open(hyp_path_tok, 'w', encoding='utf-8') as f: f.write('\n'.join(hypothesis) + '\n') #with open(hyp_selected_pos_path, 'wb') as f: # pkl.dump(hypothesis_selected_pos, f) restore_segmentation(hyp_path) # evaluate BLEU score bleu = eval_moses_bleu(ref_path, hyp_path) print("BLEU %s-%s; %s %s : %f" % (src_lang, trg_lang, hyp_path, ref_path, bleu)) # write BLEU score result to file result_path = os.path.join(params.hyp_path, "result.txt") with open(result_path, 'w', encoding='utf-8') as f: f.write("BLEU %s-%s; %s %s : %f\n" % (src_lang, trg_lang, hyp_path, ref_path, bleu))
def main(args): rng = np.random.RandomState(0) # Make dump path if not os.path.exists(args.dump_path): subprocess.Popen("mkdir -p %s" % args.dump_path, shell=True).wait() else: if os.listdir(args.dump_path): m = "Directory {} is not empty.".format(args.dump_path) raise ValueError(m) if len(args.log_file): write_log = True else: write_log = False # load model parameters model_dir = os.path.dirname(args.load_model) params_path = os.path.join(model_dir, 'params.pkl') with open(params_path, "rb") as f: params = pickle.load(f) # load data parameters and model parameters from checkpoint checkpoint_path = os.path.join(model_dir, 'checkpoint.pth') assert os.path.isfile(checkpoint_path) data = torch.load( checkpoint_path, map_location=lambda storage, loc: storage.cuda(params.local_rank)) for k, v in data["params"].items(): params.__dict__[k] = v dico = Dictionary(data["dico_id2word"], data["dico_word2id"], data["dico_counts"]) # Print score for k, v in data["best_metrics"].items(): print("- {}: {}".format(k, v)) # Fix some of the params we pass to load_data params.debug_train = False params.max_vocab = -1 params.min_count = 0 params.tokens_per_batch = -1 params.max_batch_size = args.batch_size params.batch_size = args.batch_size # load data data = load_data(args.data_path, params) # Print data summary for (src, tgt), dataset in data['para'].items(): datatype = "Para data (%s)" % ( "WITHOUT labels" if dataset.labels is None else "WITH labels") m = '{: <27} - {: >12}:{: >10}'.format(datatype, '%s-%s' % (src, tgt), len(dataset)) print(m) # Fix some of the params we pass to the model builder params.reload_model = args.load_model # build model if params.encoder_only: model = build_model(params, dico) else: encoder, decoder = build_model(params, dico) model = encoder # Predict model = model.module if params.multi_gpu else model model.eval() start = time.time() for (src, tgt), dataset in data['para'].items(): path = os.path.join(args.dump_path, "{}-{}.pred".format(src, tgt)) scores_file = open(path, "w") lang1_id = params.lang2id[src] lang2_id = params.lang2id[tgt] diffs = [] nb_written = 0 for batch in dataset.get_iterator(False, group_by_size=False, n_sentences=-1, return_indices=False): (sent1, len1), (sent2, len2), labels = batch sent1, len1 = truncate(sent1, len1, params.max_len, params.eos_index) sent2, len2 = truncate(sent2, len2, params.max_len, params.eos_index) x, lengths, positions, langs = concat_batches(sent1, len1, lang1_id, sent2, len2, lang2_id, params.pad_index, params.eos_index, reset_positions=True) x, lengths, positions, langs = to_cuda(x, lengths, positions, langs) with torch.no_grad(): # Get sentence pair embedding h = model('fwd', x=x, lengths=lengths, positions=positions, langs=langs, causal=False)[0] CLF_ID1, CLF_ID2 = 8, 9 # very hacky, use embeddings to make weights for the classifier emb = (model.module if params.multi_gpu else model).embeddings.weight pred = F.linear(h, emb[CLF_ID1].unsqueeze(0), emb[CLF_ID2, 0]) pred = torch.sigmoid(pred) pred = pred.view(-1).cpu().numpy().tolist() for p, l1, l2 in zip(pred, len1, len2): if l1.item() == 0 and l2.item() == 0: scores_file.write("0.00000000\n") else: scores_file.write("{:.8f}\n".format(p)) nb_written += len(pred) if nb_written % 1000 == 0: elapsed = int(time.time() - start) lpss = elapsed % 60 lpsm = elapsed // 60 lpsh = lpsm // 60 lpsm = lpsm % 60 msg = "[{:02d}:{:02d}:{:02d} {}-{}]".format( lpsh, lpsm, lpss, src, tgt) msg += " {}/{} ({:.2f}%) sentences processed".format( nb_written, len(dataset), 100 * nb_written / len(dataset)) print(msg) if write_log: with open(args.log_file, "a") as fout: fout.write(msg + "\n") # Try reversing order if TEST_REVERSE: x, lengths, positions, langs = concat_batches( sent2, len2, lang2_id, sent1, len1, lang1_id, params.pad_index, params.eos_index, reset_positions=True) x, lengths, positions, langs = to_cuda(x, lengths, positions, langs) with torch.no_grad(): # Get sentence pair embedding h = model('fwd', x=x, lengths=lengths, positions=positions, langs=langs, causal=False)[0] CLF_ID1, CLF_ID2 = 8, 9 # very hacky, use embeddings to make weights for the classifier emb = (model.module if params.multi_gpu else model).embeddings.weight pred_rev = F.linear(h, emb[CLF_ID1].unsqueeze(0), emb[CLF_ID2, 0]) pred_rev = torch.sigmoid(pred_rev) pred_rev = pred_rev.view(-1).cpu().numpy().tolist() for p, pp in zip(pred, pred_rev): diffs.append(p - pp) if TEST_REVERSE: print( "Average absolute diff between score(l1,l2) and score(l2,l1): {}" .format(np.mean(np.abs(diffs)))) scores_file.close()
def train(self): params = self.params self.model.train() # training variables losses = [] ns = 0 # number of sentences nw = 0 # number of words t = time.time() lang_id1 = params.lang2id[params.src_lang] lang_id2 = params.lang2id[params.trg_lang] for sent1, len1, sent2, len2, y, _, _ in self.dataloader['train']: self.global_step += 1 sent1, len1 = truncate(sent1, len1, params.max_len, params.eos_index) sent2, len2 = truncate(sent2, len2, params.max_len, params.eos_index) x, lengths, positions, langs = concat_batches(sent1, len1, lang_id1, sent2, len2, lang_id2, params.pad_index, params.eos_index, reset_positions=True) bs = len(len1) # cuda x, y, lengths, positions, langs = to_cuda(x, y, lengths, positions, langs, gpu=self.gpu) # loss output = self.model(x, lengths, positions, langs) loss = self.criterion(output, y) # backward / optimization self.optimizer_e.zero_grad() self.optimizer_p.zero_grad() loss.backward() self.optimizer_e.step() self.optimizer_p.step() losses.append(loss.item()) # log if self.global_step % self.params.report_interval == 0: logger.info("GPU %i - Epoch %i - Global_step %i - Loss: %.4f" % (self.gpu, self.epoch, self.global_step, sum(losses) / len(losses))) nw, t = 0, time.time() losses = [] if self.global_step % params.eval_interval == 0: if self.gpu == 0: logger.info("XLM - Evaluating") with torch.no_grad(): scores = self.eval() if scores['acc'] > self.best_acc: self.best_acc = scores['acc'] torch.save( self.model.module, os.path.join(params.save_model, 'best_acc_model.pkl')) with open( os.path.join(params.save_model, 'best_acc.note'), 'a') as f: f.write(str(self.best_acc) + '\n') with open(os.path.join(params.save_model, 'acc.note'), 'a') as f: f.write(str(scores['acc']) + '\n') logger.info("acc - %i " % scores['acc']) self.model.train()
def run_test(self): params = self.params result_path = params.test_result_path + '_{}'.format(self.gpu) self.model.eval() lang_id1 = params.lang2id[params.src_lang] lang_id2 = params.lang2id[params.trg_lang] proba_result = [] src_text_list = [] trg_text_list = [] with torch.no_grad(): for sent1, len1, sent2, len2, _, src_text, trg_text in tqdm( self.dataloader['test']): sent1, len1 = truncate(sent1, len1, params.max_len, params.eos_index) sent2, len2 = truncate(sent2, len2, params.max_len, params.eos_index) x, lengths, positions, langs = concat_batches( sent1, len1, lang_id1, sent2, len2, lang_id2, params.pad_index, params.eos_index, reset_positions=True) # cuda x, lengths, positions, langs = to_cuda(x, lengths, positions, langs, gpu=self.gpu) # forward output = self.model(x, lengths, positions, langs) proba = F.softmax(output, 1)[:, 1] proba_result.extend(proba.cpu().numpy()) src_text_list.extend(src_text) trg_text_list.extend(trg_text) assert len(proba_result) == len(src_text_list) assert len(proba_result) == len(trg_text_list) if len(proba_result) > params.flush_frequency: logger.info(" GPU %i - write out score..." % self.gpu) with open(result_path, 'a') as f: for i in range(len(proba_result)): f.write('{}{}{}{}{}'.format( src_text_list[i], params.delimeter, trg_text_list[i], params.delimeter, str(proba_result[i])) + os.linesep) proba_result = [] src_text_list = [] trg_text_list = [] # write out the remainings logger.info(" GPU %i - write out score..." % self.gpu) with open(result_path, 'a') as f: for i in range(len(proba_result)): f.write('{}{}{}{}{}'.format( src_text_list[i], params.delimeter, trg_text_list[i], params.delimeter, str(proba_result[i])) + os.linesep) proba_result = [] src_text_list = [] trg_text_list = []