for i in range(len(tgt_dict)): f.write(u"%s %s\n" % (tgt_dict[i], " ".join('%.5f' % x for x in mapped_tgt_emb[i]))) print(f'Writing corresponding source embeddings to {pre_src_path}') with io.open(pre_src_path, 'w', encoding='utf-8') as f: f.write(u"%i %i\n" % src_emb.shape) for i in range(len(src_dict)): f.write(u"%s %s\n" % (src_dict[i], " ".join('%.5f' % x for x in src_emb[i]))) save_s2t = False save_t2s = False args.dico_method = "csls_knn_10" evaluator = Evaluator(model, src_emb, tgt_emb) to_log = OrderedDict() print("--------------------------------- Before refinement ---------------------------------" ) evaluator.all_eval(to_log, s2t=s2t, t2s=t2s) if args.n_refinement > 0: print("--------------------------------- Starting Procrustes Refinement ---------------------------------") for n_iter in range(args.n_refinement): print("Refinement iteration %d" % (n_iter+1)) model.build_dictionary(src_emb, tgt_emb, s2t=s2t, t2s=t2s) model.procrustes(src_emb, tgt_emb, s2t=s2t, t2s=t2s) to_log["iters"] = n_iter evaluator.all_eval(to_log, s2t=s2t, t2s=t2s) if s2t and to_log[VALIDATION_METRIC_SUP_s2t] > best_valid_s2t_metric: model.set_save_s2t_path(save_path + "best_" + args.src_lang + "2" + args.tgt_lang + "_params.bin") model.save_best_s2t()
tic = time.time() if args.supervise_id: sup_src_batch, sup_tgt_batch = src_in_dict, tgt_in_dict else: sup_src_batch = sup_tgt_batch = None model.flow_step(base_src_idx, base_tgt_idx, src_idx, tgt_idx, training_stats, sup_src_batch, sup_tgt_batch) n_words_proc += len(src_idx) * 2 if train_step > 0 and train_step % args.valid_steps == 0: gc.collect() to_log = OrderedDict({'train_iters': train_step, 'exp_path': exp_path}) evaluator.all_eval(to_log, train=True, unsup_eval=args.valid_option == "unsup") if to_log[VALIDATION_METRIC_s2t] > best_valid_s2t_metric: model.set_save_s2t_path(exp_path + "best_s2t_params.bin") model.save_best_s2t() best_valid_s2t_metric = to_log[VALIDATION_METRIC_s2t] best_valid_csls_s2t_metric = to_log[VALIDATION_METRIC_SUP_s2t] best_valid_density_s2t_metric = to_log[DENSITY_METRIC_SUP_s2t] if to_log[VALIDATION_METRIC_t2s] > best_valid_t2s_metric: model.set_save_t2s_path(exp_path + "best_t2s_params.bin") model.save_best_t2s() best_valid_t2s_metric = to_log[VALIDATION_METRIC_t2s] best_valid_csls_t2s_metric = to_log[VALIDATION_METRIC_SUP_t2s] best_valid_density_t2s_metric = to_log[DENSITY_METRIC_SUP_t2s]
stats_log = [ '%s: %.4f' % (v, np.mean(stats[k])) for k, v in stats_str if len(stats[k]) > 0 ] # stats_log.append('%i samples/s' % int(n_words_proc / (time.time() - tic))) logger.info(('%06i - ' % n_epoch) + ' - '.join(stats_log)) # reset tic = time.time() # n_words_proc = 0 for k, _ in stats_str: del stats[k][:] # embeddings / discriminator evaluation to_log = OrderedDict({'n_epoch': n_epoch}) evaluator.all_eval(to_log) evaluator.eval_dis(to_log) def default(o): if isinstance(o, np.int64): return int(o) raise TypeError # json.dumps({'value': np.int64(42)}, default=default) # JSON log / save best model / end of epoch # logger.info("__log__:%s" % json.dumps(to_log, default=default)) # trainer.save_best(to_log, VALIDATION_METRIC) # logger.info('End of epoch %i.\n\n' % n_epoch) # update the learning rate (stop if too small) trainer.update_lr(to_log, VALIDATION_METRIC)