def main(params): mp = torch.multiprocessing.get_context('spawn') error_queue = mp.SimpleQueue() error_handler = ErrorHandler(error_queue) port = random.randint(10000, 20000) params.init_method = 'tcp://localhost:{port}'.format(port=port) processes = [] data = load_data(params, 'train') test_data = load_data(params, 'test') for rank in range(params.gpu_num): params.rank = rank p = mp.Process(target=init_processes, args=( params, data, test_data, run, error_queue, ), daemon=True) p.start() error_handler.add_child(p.pid) processes.append(p) for p in processes: p.join()
def main(params): data = load_data(params, name='train') test_data = load_data(params, name='test') encoder, decoder, num_updates = build_mt_model(params) trainer = TrainerMT(encoder, decoder, data, test_data, params, num_updates) for i in range(trainer.epoch, params.max_epoch): logger.info("==== Starting epoch %i ...====" % trainer.epoch) trainer.train_epoch() tqdm.write('Finish epcoh %i.' % i)
def run(params, error_queue): try: # start training logger.info(params) if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') torch.cuda.set_device(params.rank) torch.manual_seed(params.seed) logger.info('Process %s is now running in gpu:%s', os.getpid(), torch.cuda.current_device()) data = load_data(params, 'train') print( data.get_iterator(shuffle=True, group_by_size=True, partition=params.rank)) encoder, decoder, num_updates = build_mt_model(params) trainer = TrainerMT(encoder, decoder, data, params, num_updates) for i in range(trainer.epoch, params.max_epoch): logger.info("==== Starting epoch %i ...====" % trainer.epoch) trainer.train_epoch() tqdm.write('Finish epcoh %i.' % i) except KeyboardInterrupt: pass # killed by parent, do nothing except Exception: # propagate exception to parent process, keeping original traceback import traceback error_queue.put((params.rank, traceback.format_exc()))
def clts_xencoder_main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # load data data = load_data(params) # cross lingual text summarization encoder, text summarization decoder xencoder, ts_encoder, ts_decoder = build_clts_xencoder_model( params, data['dico']) emb_weights = xencoder.embeddings.weight.data.cpu().numpy() # with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv', 'w', encoding='utf-8') as out: metadata = open("./pretrained_models/mlm_xnli15_1024/xlm15-metadata.txt", "w", encoding='utf-8') embeddings = open("./pretrained_models/mlm_xnli15_1024/embeddings.tsv", "w", encoding='utf-8') with open("./pretrained_models/mlm_xnli15_1024/token_embeddings.tsv", "w", encoding='utf-8') as out: for i in range(len(data['dico'])): word = data['dico'][i] emb = '\t'.join([str(v) for v in emb_weights[i]]) out.write(f"{word}\t{emb}\n") metadata.write(f"{word}\n") embeddings.write(f"{emb}\n") metadata.close() embeddings.close()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecGenerator(trainer, data, params) # evaluation if params.eval_only: evaluator.generate(trainer) exit()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) hidden_size = 1024 encoder = EncoderRNN.EncoderRNN(params.n_words, hidden_size).cuda() decoder = Attention_decoder.Attention_decoder(hidden_size, params.n_words, dropout_p=0.1).cuda() trainer = LSTM_Trainer(encoder, decoder, data, params) evaluator = LSTM_Evaluator(trainer, data, params) # set sampling probabilities for training set_sampling_probs(data, params) # language model training for count in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.try_lstm(lang1, lang2, params.lambda_mt) logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) # save the output of softmax trainer.save_softmax_output(clm_temp, 'clm_temp') trainer.save_softmax_output(ml_temp, 'ml_temp') trainer.save_softmax_output(bt_temp, 'bt_temp')
def perform_translation(input_file_path, translation_directory, cloze_train_path, question_train_path, fasttext_vectors_path, checkpoint_path): params = get_params( exp_name='translation', dump_path=translation_directory, cloze_train_path=cloze_train_path, question_train_path=question_train_path, cloze_test_path=input_file_path, fasttext_vectors_path=fasttext_vectors_path, checkpoint_path=checkpoint_path, ) # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) data = load_data(params, mono_only=True) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) with torch.no_grad(): lang1, lang2 = 'cloze', 'question' evaluator.encoder.eval() evaluator.decoder.eval() lang1_id = evaluator.params.lang2id[lang1] lang2_id = evaluator.params.lang2id[lang2] translations = [] dataset = evaluator.data['mono'][lang1]['test'] dataset.batch_size = params.batch_size for i, (sent1, len1) in enumerate( dataset.get_iterator(shuffle=False, group_by_size=False)()): encoded = evaluator.encoder(sent1.cuda(), len1, lang1_id) sent2_, len2_, _ = evaluator.decoder.generate(encoded, lang2_id) lang1_text = convert_to_text(sent1, len1, evaluator.dico[lang1], lang1_id, evaluator.params) lang2_text = convert_to_text(sent2_, len2_, evaluator.dico[lang2], lang2_id, evaluator.params) translations += zip(lang1_text, lang2_text) # export sentences to hypothesis file and restore BPE segmentation out_name = os.path.join(translation_directory, 'output_translations.txt') with open(out_name, 'w', encoding='utf-8') as f: f.write('\n'.join(['\t'.join(st) for st in translations]) + '\n') restore_segmentation(out_name) return out_name
def load(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) return trainer, evaluator
def clts_elmo_main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # load data data = load_data(params) # cross lingual text summarization encoder, text summarization decoder elmo, ts_encoder, ts_decoder = build_clts_elmo_model(params, data['dico']) emb_weights = elmo.language_model.embeddings.weight.data.cpu().numpy() with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv', 'w', encoding='utf-8') as out: for i in range(len(data['dico'])): word = data['dico'][i] emb = '\t'.join([str(v) for v in emb_weights[i]]) out.write(f"{word}\t{emb}\n")
def inference(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_best_model() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode evaluator.eval_inference() exit()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) _lang1, _lang2 = ( params.langs[0], params.langs[1]) if params.langs[0] < params.langs[1] else ( params.langs[1], params.langs[0]) dataset = data['para'][(_lang1, _lang2)]['test'] print(params.n_words) print("ref_paths" + str(params.ref_paths)) for i, ((x1, len1, id1, lenid1), (x2, len2, id2, lenid2)) in enumerate( dataset.get_iterator(shuffle=False, group_by_size=True, n_sentences=-1, tokens_per_batch=2000)): print('x2' + str(x2.size())) print("len2[None] - 1" + str(len2[None] - 1) + " " + str(len2[None])) print(str(len2[0])) print('len2' + str(len2)) alen = torch.arange(len2.max(), dtype=torch.long, device=len2.device) # do not predict anything given the last target word pred_mask = alen[:, None] < len2[None] - 1 print("pred_mask" + str(pred_mask)) print(str(pred_mask.size())) y = x2[1:].masked_select(pred_mask[:-1]) print("yyyy" + str(y)) print(str(y.size())) assert len(y) == (len2 - 1).sum().item()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) params.lgs = lgs = params.lgs.split("-") if len(lgs) == 1: lgs.append(lgs[0]) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # Replace the original MLM steps for lang1, lang2 in shuf_order(params.mlm_steps, params): if params.do_meta_update: trainer.meta_mlm_step(lang1) else: trainer.mlm_step(lang1, lang2, params.lambda_mlm) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # float16 if params.fp16: assert torch.backends.cudnn.enabled if params.encoder_only: model = network_to_half(model) else: encoder = network_to_half(encoder) decoder = network_to_half(decoder) # distributed # if params.multi_gpu: # logger.info("Using nn.parallel.DistributedDataParallel ...") # if params.encoder_only: # model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) # else: # encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True) # decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # mass prediction steps for lang in shuf_order(params.mass_steps): trainer.mass_step(lang, params.lambda_mass) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) # back-parallel steps for lang1, lang2 in shuf_order(params.bmt_steps, params): trainer.bmt_step(lang1, lang2, params.lambda_bmt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model # reload-model options are in here if params.encoder_only: model = build_model(params, data['dico']) if params.use_adapters: logger.info("Using adapters") for param in model.named_parameters(): if param[0][:8] != "adapters": param[1].requires_grad = False for param_name, param in model.embeddings.named_parameters(): param.requires_grad = True for param_name, param in model.position_embeddings.named_parameters( ): param.requires_grad = True for param_name, param in model.pred_layer.named_parameters(): param.requires_grad = True for param in model.layer_norm_emb.parameters(): param.requires_grad = True for param in model.named_parameters(): logger.info(param[0] + ' required grad = ' + str(param[1].requires_grad)) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) logger.info("Number of trainable parameters (encoder): %i" % sum( [p.numel() for p in trainer.model.parameters() if p.requires_grad])) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) logger.info( "Number of trainable parameters (encoder): %i" % sum([p.numel() for p in encoder.parameters() if p.requires_grad])) logger.info( "Number of trainable parameters (decoder): %i" % sum([p.numel() for p in decoder.parameters() if p.requires_grad])) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for epoch in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # check_data_params(params) check_model_params(params) # initialize the experiment logger = initialize_exp(params) # load data data = load_data(params) # check_vocab(data) # build model if params.encoder_only: model = build_model(params, data['source_dico']) else: encoder, decoder = build_model(params, data['source_dico'], data['target_dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_iter = 0 while trainer.n_iter < trainer.epoch_size: if params.cs_step: trainer.content_selection_step(params.lambda_cs) if params.sm_step: trainer.summarization_step(params.lambda_sm) if params.lm_step: trainer.clm_step(params.lambda_lm) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment meta_params = copy.deepcopy(params).meta_params params.meta_params = "..." # to long to be log logger = initialize_exp(params) params.meta_params = meta_params # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # todo : good params.n_words (We take the one from the first task have this parameter for the moment.) """ But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, and therefore no problem if we choose one at random. """ p = params.meta_params[data['key']] # build model if params.encoder_only: model = build_model(params=p, dico=data['dico']) else: encoder, decoder = build_model(params=p, dico=data['dico']) # todo : good pad_index and eos_index and ... (I'll take the one from the first task for the moment.) """ But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, and therefore no problem if we choose one at random. """ params.pad_index = p.pad_index params.eos_index = p.eos_index # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) if not params.meta_learning: for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) else: for lgs in params.meta_params.keys(): logger.info("============ task : %s " % lgs) for k, v in scores[lgs].items(): if k != "epoch": logger.info("%s -> %.6f" % (k, v)) logger.info("============ all") for k, v in scores.items(): if not (k in (list(params.meta_params.keys()) + ['epoch'])): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) if not params.meta_learning: trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) trainer.iter() else: # our trainer.n_sentences = {} """ Here we build language lists for each of our meta-taks. Indeed, for two language lists l1 and l2, the objective will be done with l1[i] and l2[i] respectively, this for each index i of the two lists. """ lang1_dic, lang2_dic, lang3_dic = {}, {}, {} """ In the case of meta-learning, we have a (meta-)data dictionary for each (meta-)task, so the keys are the languages conserved by the task. """ data_keys_dic = {} # equivalent to "for task in list of task" in the original algorithm, except here we prepare all the tasks beforehand. for lgs in params.meta_params.keys(): trainer.n_sentences[lgs] = 0 # CLM try: lang1_dic['clm_step'] except KeyError: lang1_dic['clm_step'], lang2_dic[ 'clm_step'], data_keys_dic['clm_step'] = [], [], [] for lang1, lang2 in shuf_order( params.meta_params[lgs].clm_steps, params): lang1_dic['clm_step'].append(lang1) lang2_dic['clm_step'].append(lang2) data_keys_dic['clm_step'].append(lgs) # MLM try: lang1_dic['mlm_step'] except KeyError: lang1_dic['mlm_step'], lang2_dic[ 'mlm_step'], data_keys_dic['mlm_step'] = [], [], [] for lang1, lang2 in shuf_order( params.meta_params[lgs].mlm_steps, params): lang1_dic['mlm_step'].append(lang1) lang2_dic['mlm_step'].append(lang2) data_keys_dic['mlm_step'].append(lgs) # parallel classification try: lang1_dic['pc_step'] except KeyError: lang1_dic['pc_step'], lang2_dic['pc_step'], data_keys_dic[ 'pc_step'] = [], [], [] for lang1, lang2 in shuf_order( params.meta_params[lgs].pc_steps, params): lang1_dic['pc_step'].append(lang1) lang2_dic['pc_step'].append(lang2) data_keys_dic['pc_step'].append(lgs) # denoising auto-encoder try: lang1_dic['ae_step'] except KeyError: lang1_dic['ae_step'], data_keys_dic['ae_step'] = [], [] for lang1 in shuf_order(params.meta_params[lgs].ae_steps): lang1_dic['ae_step'].append(lang1) data_keys_dic['ae_step'].append(lgs) # machine translation try: lang1_dic['mt_step'] except KeyError: lang1_dic['mt_step'], lang2_dic['mt_step'], data_keys_dic[ 'mt_step'] = [], [], [] for lang1, lang2 in shuf_order( params.meta_params[lgs].mt_steps, params): lang1_dic['mt_step'].append(lang1) lang2_dic['mt_step'].append(lang2) data_keys_dic['mt_step'].append(lgs) # back-translation try: lang1_dic['bt_step'] except KeyError: lang1_dic['bt_step'], lang2_dic['bt_step'], lang3_dic[ 'bt_step'], data_keys_dic['bt_step'] = [], [], [], [] for lang1, lang2, lang3 in shuf_order( params.meta_params[lgs].bt_steps): lang1_dic['bt_step'].append(lang1) lang2_dic['bt_step'].append(lang2) lang3_dic['bt_step'].append(lang3) data_keys_dic['bt_step'].append(lgs) flag = True # equivalent to "while not done do" in the original algorithm while flag: # CLM steps #print("clm_step", flag) a = trainer.clm_step(lang1_dic['clm_step'], lang2_dic['clm_step'], params.lambda_clm, data_keys_dic['clm_step']) #print("mlm_step", flag) # MLM steps (also includes TLM if lang2 is not None) b = trainer.mlm_step(lang1_dic['mlm_step'], lang2_dic['mlm_step'], params.lambda_mlm, data_keys_dic['mlm_step']) # parallel classification steps c = trainer.pc_step(lang1_dic['pc_step'], lang2_dic['pc_step'], params.lambda_pc, data_keys_dic['pc_step']) if isinstance(trainer, EncDecTrainer): # denoising auto-encoder steps d = trainer.mt_step(lang1_dic['ae_step'], lang1_dic['ae_step'], params.lambda_ae, data_keys_dic['ae_step']) # machine translation steps e = trainer.mt_step(lang1_dic['mt_step'], lang2_dic['mt_step'], params.lambda_mt, data_keys_dic['mt_step']) # back-translation steps f = trainer.bt_step(lang1_dic['bt_step'], lang2_dic['bt_step'], lang3_dic['bt_step'], params.lambda_bt, data_keys_dic['bt_step']) # do things better if (not a) and (not b) and (not c) and (not d) and ( not e) and (not f): flag = False # End of epoch else: flag = True else: # do things better if (not a) and (not b) and (not c): flag = False # End of epoch else: flag = True trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log if not params.meta_learning: for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) else: for lgs in params.meta_params.keys(): logger.info("============ task : %s " % lgs) for k, v in scores[lgs].items(): if k != "epoch": logger.info("%s -> %.6f" % (k, v)) logger.info("============ all") for k, v in scores.items(): if not (k in (list(params.meta_params.keys()) + ['epoch'])): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) # our logger.info("============ garbage collector collecting %d ..." % gc.collect())
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # # initialize SLURM signal handler for time limit / pre-emption # init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # # float16 # if params.fp16: # assert torch.backends.cudnn.enabled # if params.encoder_only: # model = network_to_half(model) # else: # encoder = network_to_half(encoder) # decoder = network_to_half(decoder) # # distributed # if params.multi_gpu: # logger.info("Using nn.parallel.DistributedDataParallel ...") # if params.fp16: # if params.encoder_only: # model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) # else: # encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True) # decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True) # else: # if params.encoder_only: # model = nn.parallel.DistributedDataParallel(model, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True) # else: # encoder = nn.parallel.DistributedDataParallel(encoder, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True) # decoder = nn.parallel.DistributedDataParallel(decoder, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # # evaluation # if params.eval_only: # scores = evaluator.run_all_evals(trainer) # for k, v in scores.items(): # logger.info("%s -> %.6f" % (k, v)) # logger.info("__log__:%s" % json.dumps(scores)) # exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 trainer.n_images = 0 while trainer.n_sentences < trainer.epoch_size or trainer.n_images < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) # shuf_order's result could be: ['fr', 'fr'] or ['en', 'fr'] or ['fr', 'en'] or ['en', 'en'] for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # Image-language pretraining steps trainer.ipm_step("coco36", params.lambda_ipm) # CMLM steps steps for m1, m2 in shuf_order(params.cmlm_steps, params): trainer.cmlm_step(m1, m2, params.lambda_cmlm) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch)
def clts_elmo_main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # cross lingual encoder # cross lingual text summarization encoder, text summarization decoder elmo, ts_encoder, ts_decoder = build_clts_elmo_model(params, data['dico']) trainer = XLMCLTSEncDecTrainer(elmo, ts_encoder, ts_decoder, data, params) evaluator = XLMCLTSEncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # float16 if params.fp16: assert torch.backends.cudnn.enabled if params.encoder_only: model = network_to_half(model) else: encoder = network_to_half(encoder) decoder = network_to_half(decoder) # distributed if params.multi_gpu: logger.info("Using nn.parallel.DistributedDataParallel ...") if params.encoder_only: model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) else: encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True) decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: logger.info('Evaluating and saving new result file') scores = evaluator.run_all_evals_match(trainer) for k, v in scores.items(): if 'likelihood' in k: logger.info("%s -> %.6f" % (k, np.mean(v))) elif 'scores' in k: logger.info("%s -> %s" % (k, v.shape)) else: logger.info("%s -> %.6f" % (k, v)) np.savetxt(os.path.join(params.dump_path, 'best-fwd-prediction.txt'),scores['%s_%s_fwd_scores' % ('test', params.mass_steps[0])],fmt='%f') for match in params.match_files.split(','): np.savetxt(os.path.join(params.dump_path, 'best-match-prediction{}.txt'.format(match.split('.')[-1])), scores['%s_%s_sentence_likelihood' % (match, params.mass_steps[0])], fmt='%f') labels = np.loadtxt(os.path.join(params.data_path, 'labels')) targets = np.loadtxt(os.path.join(params.data_path, 'suffix')) preds = scores['%s_%s_sentence_likelihood' % ('match', params.mass_steps[0])] results = pd.DataFrame({'label': labels, 'target': targets, 'pred': preds}) results.to_pickle(os.path.join(params.dump_path, 'best-matching-prediction.pkl')) #logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # mass prediction steps for lang in shuf_order(params.mass_steps): trainer.mass_step(lang, params.lambda_mass) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_epoch_evals_match(trainer) # print / JSON log for k, v in scores.items(): if 'likelihood' in k: logger.info("%s -> %.6f" % (k, np.mean(v))) elif 'scores' in k: logger.info("%s -> %s" % (k, v.shape)) else: logger.info("%s -> %.6f" % (k, v)) #if params.is_master: #logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
"--length_penalty", type=float, default=1.0, help="Length penalty: <1.0 favors shorter, >1.0 favors longer sentences") params = parser.parse_args() if __name__ == '__main__': # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator encoder = nn.DataParallel(encoder, device_ids=[0, 1, 2], output_device=[0, 1, 2]) decoder = nn.DataParallel(decoder, device_ids=[0, 1, 2], output_device=[0, 1, 2]) if discriminator != None: discriminator = nn.DataParallel(discriminator, device_ids=[0, 1, 2], output_device=[0, 1, 2]) if lm != None: lm = nn.DataParallel(lm, device_ids=[0, 1, 2], output_device=[0, 1, 2])
def main(args): # initialize the multi-GPU / multi-node training init_distributed_mode(args, make_communication_groups=False) # initialize the experiment logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec', 'loss', 'prec_val', 'loss_val') # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() main_data_path = args.data_path if args.debug: args.data_path = os.path.join(main_data_path, 'val') else: args.data_path = os.path.join(main_data_path, 'train') train_dataset = load_data(args) args.data_path = os.path.join(main_data_path, 'val') val_dataset = load_data(args) # prepare the different data transformations tr_val, tr_train = get_data_transformations() train_dataset.transform = tr_train val_dataset.transform = tr_val val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, ) # build model skeleton fix_random_seeds(args.seed) nmb_classes = 205 if 'places' in args.data_path else 1000 model = model_factory(args, relu=True, num_classes=nmb_classes) # load pretrained weights load_pretrained(model, args) # merge sobel layers with first convolution layer if args.sobel2RGB: sobel2RGB(model) # re initialize classifier if hasattr(model.body, 'classifier'): for m in model.body.classifier.modules(): if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.fill_(0.1) # distributed training wrapper model = to_cuda(model, [args.gpu_to_work_on], apex=True) logger.info('model to cuda') # set optimizer optimizer = sgd_optimizer(model, args.lr, args.wd) ## variables to reload to fetch in checkpoint to_restore = {'epoch': 0, 'start_iter': 0} # re start from checkpoint restart_from_checkpoint( args, run_variables=to_restore, state_dict=model, optimizer=optimizer, ) args.epoch = to_restore['epoch'] args.start_iter = to_restore['start_iter'] if args.evaluate: validate_network(val_loader, [model], args) return # Supervised training for _ in range(args.epoch, args.nepochs): logger.info("============ Starting epoch %i ... ============" % args.epoch) fix_random_seeds(args.seed + args.epoch) # train the network for one epoch adjust_learning_rate(optimizer, args) scores = train_network(args, model, optimizer, train_dataset) scores_val = validate_network(val_loader, [model], args) # save training statistics logger.info(scores + scores_val) training_stats.update(scores + scores_val)
def main(args): # initialize the multi-GPU / multi-node training init_distributed_mode(args, make_communication_groups=False) # initialize the experiment logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec', 'loss', 'prec_val', 'loss_val') # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() if not 'pascal' in args.data_path: main_data_path = args.data_path args.data_path = os.path.join(main_data_path, 'train') train_dataset = load_data(args) else: train_dataset = VOC2007_dataset(args.data_path, split=args.split) args.test = 'val' if args.split == 'train' else 'test' if not 'pascal' in args.data_path: if args.cross_valid is None: args.data_path = os.path.join(main_data_path, 'val') val_dataset = load_data(args) else: val_dataset = VOC2007_dataset(args.data_path, split=args.test) if args.cross_valid is not None: kfold = KFold(per_target(train_dataset.imgs), args.cross_valid, args.kfold) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=kfold.train, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, sampler=kfold.val, num_workers=args.workers) else: train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # prepare the different data transformations tr_val, tr_train = get_data_transformations() train_dataset.transform = tr_train val_dataset.transform = tr_val # build model skeleton fix_random_seeds(args.seed) model = model_factory(args.arch, args.sobel) load_pretrained(model, args) # keep only conv layers model.body.classifier = None model.conv = args.conv if 'places' in args.data_path: nmb_classes = 205 elif 'pascal' in args.data_path: nmb_classes = 20 else: nmb_classes = 1000 reglog = RegLog(args.arch, nmb_classes, args.conv) # distributed training wrapper model = to_cuda(model, [args.gpu_to_work_on], apex=False) reglog = to_cuda(reglog, [args.gpu_to_work_on], apex=False) logger.info('model to cuda') # set optimizer optimizer = sgd_optimizer(reglog, args.lr, args.wd) ## variables to reload to fetch in checkpoint to_restore = {'epoch': 0, 'start_iter': 0} # re start from checkpoint restart_from_checkpoint( args, run_variables=to_restore, state_dict=reglog, optimizer=optimizer, ) args.epoch = to_restore['epoch'] args.start_iter = to_restore['start_iter'] model.eval() reglog.train() # Linear training for _ in range(args.epoch, args.nepochs): logger.info("============ Starting epoch %i ... ============" % args.epoch) # train the network for one epoch scores = train_network(args, model, reglog, optimizer, train_loader) if not 'pascal' in args.data_path: scores_val = validate_network(val_loader, [model, reglog], args) else: scores_val = evaluate_pascal(val_dataset, [model, reglog]) scores = scores + scores_val # save training statistics logger.info(scores) training_stats.update(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) print(data) # build model if params.encoder_only: model = build_model(params) else: encoder, decoder = build_model(params) # build trainer, reload potential checkpoints / build evaluator trainer = XTrainer(model, data, params) evaluator = XEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): if params.is_understanding: trainer.mlm_step(lang1, lang2, params.lambda_mlm) for lang1, lang2 in shuf_order(params.text_steps, params): if params.is_ntg: trainer.ntg_step(lang1, None, params.lambda_mlm) # cross-modal caption steps for lang1, lang2 in shuf_order(params.cross_modal_steps, params): if params.is_mt: trainer.mt_ic_step(lang1, lang2, params.lambda_ic) else: trainer.ic_step(lang1, lang2, params.lambda_ic) if params.is_freelb: trainer.free_lb_ic_step(lang1, lang2, params.lambda_ic) for lang1, lang2 in shuf_order(params.mlm_steps, params, n=3): if params.is_generation: trainer.bart_mlm_step(lang1, lang2, params.lambda_imlm) trainer.bart_mass_step(lang1, lang2, params.lambda_imlm) for lang1, lang2 in shuf_order(params.cross_ae_steps, params): trainer.bart_img_step(lang1, lang2, params.lambda_ida) for lang1, lang2 in shuf_order(params.cross_rel_steps, params): if params.is_pretrain: trainer.pretrain_rel_step(lang1, lang2) else: if params.is_slide: trainer.slide_step(lang1, lang2, params.lambda_t2i) else: # support multi languages trainer.rel_step(lang1, lang2, params.lambda_t2i, params.lambda_i2t) # for lang1, lang2 in shuf_order(params.cross_mlm_steps, params): # trainer.mlm_step(lang1, lang2, params.lambda_mlm) # # for lang1, lang2 in shuf_order(params.cross_mrm_steps, params): # trainer.mrm_step(lang1, lang2, params.lambda_mrm) # # for lang1, lang2 in shuf_order(params.cross_mrfr_steps, params): # trainer.mrfr_step(lang1, lang2, params.lambda_mrfr) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) evaluate_results = [] import os if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) evaluate_results.append(json.dumps(scores)) with open(os.path.join(params.dump_path, "epoch_{0}.eval_log".format(trainer.epoch)), 'w') as writer: for line in evaluate_results: writer.write(line + '\n') # end of epoch trainer.save_best_model(scores) if trainer.epoch % params.save_every_epoch == 0 and params.is_master: trainer.save_model('model_pretrain_%i' % trainer.epoch) trainer.save_periodic() trainer.end_epoch(scores)
def main(arguments): """ """ parser = argparse.ArgumentParser( description='Enumerate over all possible positions to pick the best one' ) parser.add_argument( '--model_path', type=str, default= '/misc/kcgscratch1/ChoGroup/mansimov/XLM-data/exp_elman/finetune_deen_tlm_uniform_4gpu_128batch_pickside_lr_debug/912lweev6s/best-valid_de-en_mt_bleu.pth', help='path to pretrained TLM model') parser.add_argument('--src_lang', type=str, default='de', help='source language') parser.add_argument('--trg_lang', type=str, default='en', help='target language') parser.add_argument('--split', type=str, default='valid', help='use valid/test split of dataset', choices=['valid', 'test']) parser.add_argument('--use_data_length', action='store_true', help='use lengths according to dataset statistics') parser.add_argument( '--num_topk_lengths', type=int, default=1, help='number of topk lengths to use when using dataset statistics') parser.add_argument('--beam_size', type=int, default=1, help='beam size to use in the experiments') parser.add_argument('--length_penalty', type=int, default=1, help='length penalty to use') parser.add_argument('--batch_size', type=int, default=1, help='batch size to use') parser.add_argument('--gen_type', type=str, default="src2trg", \ choices=['src2trg', 'trg2src'], \ help='generation type to use src2trg (de->en) or trg2src (en->de)') parser.add_argument('--print_every', type=int, default=10, help='how often to log progress') parser.add_argument('--alpha', type=float, default=1., help='weight to put on entropy') parser.add_argument('--beta', type=float, default=1., help='weight to put on log prob') parser.add_argument('--gamma', type=float, default=0., help='weight to put on left to right decoding') parser.add_argument('--uniform', action='store_true', help='do uniform sampling of positions') parser.add_argument( '--iter_mult', type=int, default=1, help='iteration multipler (multiply this number by target length)') parser.add_argument( '--mask_schedule', type=str, choices=["constant", "linear", "all"], default="linear", help='schedule for number of masks to predict at each iteration') parser.add_argument( '--constant_k', type=int, default=1, help="If constant mask schedule, number of masks at each iteration") parser.add_argument('--gpu_id', type=int, default=0, help='GPU ID, use -1 for CPU') args = parser.parse_args(arguments) if args.uniform: args.alpha, args.beta, args.gamma = 0, 0, 0 # set GPU if args.gpu_id >= 0: torch.cuda.set_device(args.gpu_id) print("Evaluating model at {0}".format(args.model_path)) # load everything from checkpoint params, dico, model = reload_checkpoint(args.model_path) # put on gpu model = model.cuda() if args.gpu_id >= 0 else model # put in eval model model = model.eval() if args.use_data_length: params.de2en_lengths = pkl.load( open(os.path.join(params.data_path, 'de2en_lengths.pkl'), 'rb')) params.en2de_lengths = pkl.load( open(os.path.join(params.data_path, 'en2de_lengths.pkl'), 'rb')) params.num_topk_lengths = args.num_topk_lengths else: params.de2en_lengths = None params.en2de_lengths = None params.num_topk_lengths = 1 # load data params.eval_only = True params.batch_size = args.batch_size data = load_data(params) # creates reference files for BLEU eval prepare_data(params, data, args.split, args.gen_type, args.alpha, args.beta, args.gamma, args.uniform, args.iter_mult, args.use_data_length, args.num_topk_lengths, args.mask_schedule, args.constant_k) # evaluate run(model, params, dico, data, args.split, args.src_lang, args.trg_lang, args.gen_type, args.alpha, args.beta, args.gamma, args.uniform, args.iter_mult, args.mask_schedule, args.constant_k, args.batch_size, args.gpu_id)
help="clip grad norm") parser.add_argument("--id", type=int, default=0) parser.add_argument("--checkpoint_dir", type=str, default='/data2/twang/simple-fairseq/all_models/big') params = parser.parse_args() params.gpu_num = 1 params.seed = 1234 params.reload_model = '{}/model_epoch{}.pt'.format(params.checkpoint_dir, params.id) params.translate_file = 'data/valid.bpe.zh' params.src_dico_file = 'data/dict.bpe.zh' params.tgt_dico_file = 'data/dict.bpe.en' params.out_file = '{}/predict_{}.en'.format(params.checkpoint_dir, params.id) if __name__ == '__main__': data = load_data(params, name='test') encoder, decoder, _ = build_mt_model(params) encoder.eval() decoder.eval() iterator = data.get_iterator(shuffle=False, group_by_size=False)() file = open(params.out_file, 'w', encoding='utf-8') total = 0 with torch.no_grad(): for (sen1, len1) in iterator: len1, bak_order = len1.sort(descending=True) sen1 = sen1[:, bak_order] sen1 = sen1.cuda() encoded = encoder(sen1, len1) sent2, len2, _ = decoder.generate(encoded) total += len2.size(0) logger.info('Translating %i sentences.' % total)
def main(params): init_distributed_mode(params) # load data data = load_data(params) emb_weights = None # build model if params.encoder_only: model = build_model(params, data['dico']) emb_weights = model.embeddings.weight.data.cpu().numpy() else: encoder, decoder = build_model(params, data['dico']) emb_weights = encoder.embeddings.weight.data.cpu().numpy() if params.gen_word_emb: # metadata = open( # "./pretrained_models/mlm_xnli15_1024/xlm15-metadata.txt", "w", encoding='utf-8') # embeddings = open( # "./pretrained_models/mlm_xnli15_1024/embeddings.tsv", "w", encoding='utf-8') # with open("./pretrained_models/mlm_xnli15_1024/token_embeddings.tsv", "w", encoding='utf-8') as out: with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv', 'w', encoding='utf-8') as out: for i in range(len(data['dico'])): word = data['dico'][i] emb = '\t'.join([str(v) for v in emb_weights[i]]) out.write(f"{word}\t{emb}\n") # metadata.write(f"{word}\n") # embeddings(f"{emb}\n") # metadata.close() # embeddings.close() if params.gen_sent_emb: evaluator = SingleEvaluator(None, data, params) sents = {} sent_embs = {} with torch.no_grad(): data_set = 'test' for lang1, lang2 in params.mlm_steps: # lang2 is None _sents, _sent_embs = evaluator.generate_sent_emb( data_set, lang1, lang2) sents[lang1] = _sents sent_embs[lang1] = _sent_embs for lang1, lang2 in params.mlm_steps: out = open( f'./dumped/{params.exp_name}/{params.exp_id}/sent_embs-{lang1}.tsv', 'w', encoding='utf-8') for sent, emb in zip(sents[lang1], sent_embs[lang1]): emb = '\t'.join([str(v) for v in emb]) out.write(f"{sent.strip()}\t{emb}\n") out.close()
def main(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode if params.eval_only: evaluator.run_all_evals(0) exit() # language model pretraining if params.lm_before > 0: logger.info("Pretraining language model for %i iterations ..." % params.lm_before) trainer.n_sentences = 0 for _ in range(params.lm_before): for lang in params.langs: trainer.lm_step(lang) trainer.iter() # define epoch size if params.epoch_size == -1: params.epoch_size = params.n_para assert params.epoch_size > 0 # start training for _ in range(trainer.epoch, params.max_epoch): logger.info( "====================== Starting epoch %i ... ======================" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < params.epoch_size: # discriminator training for _ in range(params.n_dis): trainer.discriminator_step() # language model training if params.lambda_lm > 0: for _ in range(params.lm_after): for lang in params.langs: trainer.lm_step(lang) # MT training (parallel data) if params.lambda_xe_para > 0: for lang1, lang2 in params.para_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para) # MT training (back-parallel data) if params.lambda_xe_back > 0: for lang1, lang2 in params.back_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_back, back=True) # autoencoder training (monolingual data) if params.lambda_xe_mono > 0: for lang in params.mono_directions: trainer.enc_dec_step(lang, lang, params.lambda_xe_mono) # AE - MT training (on the fly back-translation) if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0: # start on-the-fly batch generations if not getattr(params, 'started_otf_batch_gen', False): otf_iterator = trainer.otf_bt_gen_async() params.started_otf_batch_gen = True # update model parameters on subprocesses if trainer.n_iter % params.otf_sync_params_every == 0: trainer.otf_sync_params() # get training batch from CPU before_gen = time.time() batches = next(otf_iterator) trainer.gen_time += time.time() - before_gen # training for batch in batches: lang1, lang2, lang3 = batch['lang1'], batch[ 'lang2'], batch['lang3'] # 2-lang back-translation - autoencoding if lang1 != lang2 == lang3: trainer.otf_bt(batch, params.lambda_xe_otfa, params.otf_backprop_temperature) # 2-lang back-translation - parallel data elif lang1 == lang3 != lang2: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) # 3-lang back-translation - parallel data elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) trainer.iter() # end of epoch logger.info( "====================== End of epoch %i ======================" % trainer.epoch) # evaluate discriminator / perplexity / BLEU scores = evaluator.run_all_evals(trainer.epoch) # print / JSON log for k, v in scores.items(): logger.info('%s -> %.6f' % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) # save best / save periodic / end epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) trainer.test_sharing()
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() if params.other_seed > -1: # deterministic torch.manual_seed(params.other_seed) torch.cuda.manual_seed(params.other_seed) np.random.seed(params.other_seed) random.seed(params.other_seed) if params.iter_seed == -1: # non-deterministic params.iter_seed = None # load data data = load_data(params) writer = SummaryWriter(params.dump_path + "/" + params.exp_name + "_log") # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) sys.exit() # set sampling probabilities for training set_sampling_probs(data, params) _iter = 0 # dump initial weights if params.save_initial: trainer.save_checkpoint('initial', include_optimizers=False) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): if params.only_vlm: # with visual features trainer.vlm_step(lang1, lang2, params.lambda_mlm, _iter) else: trainer.mlm_step(lang1, lang2, params.lambda_mlm, _iter) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) for lang1, lang2 in shuf_order(params.mmt_steps, params): trainer.mmt_step(lang1, lang2, params.lambda_mt) trainer.iter() _iter += 1 logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): writer.add_scalar(k, v, _iter) logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps (causal languge model) for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt, params.bt_sample_temperature) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch if params.validation_metrics != '': trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
# %autoreload 2 # %% import numpy as np from plotly.offline import iplot import plotly.io as pio from src.data.loader import load_data from src.utility import Utility import diofant from pprint import pprint as print import cufflinks as cf cf.go_offline() # %% data = load_data() # %% data.set_index("governorate")[[ "Number of institutes, centers and specialized hospitals in public sector", "Number of public district hospitals", "Number of regional public hospitals", "The number of public hospitals", ]].fillna(0) # %% [markdown] # ## Indicators # %% [markdown] # 1. Access to professionals # * Number of doctors / 1000 inhabitants