示例#1
0
def main(params):
    mp = torch.multiprocessing.get_context('spawn')
    error_queue = mp.SimpleQueue()
    error_handler = ErrorHandler(error_queue)
    port = random.randint(10000, 20000)
    params.init_method = 'tcp://localhost:{port}'.format(port=port)
    processes = []
    data = load_data(params, 'train')
    test_data = load_data(params, 'test')
    for rank in range(params.gpu_num):
        params.rank = rank
        p = mp.Process(target=init_processes,
                       args=(
                           params,
                           data,
                           test_data,
                           run,
                           error_queue,
                       ),
                       daemon=True)
        p.start()
        error_handler.add_child(p.pid)
        processes.append(p)
    for p in processes:
        p.join()
示例#2
0
def main(params):
    data = load_data(params, name='train')
    test_data = load_data(params, name='test')
    encoder, decoder, num_updates = build_mt_model(params)
    trainer = TrainerMT(encoder, decoder, data, test_data, params, num_updates)

    for i in range(trainer.epoch, params.max_epoch):
        logger.info("==== Starting epoch %i ...====" % trainer.epoch)
        trainer.train_epoch()
        tqdm.write('Finish epcoh %i.' % i)
def run(params, error_queue):
    try:
        # start training
        logger.info(params)
        if not torch.cuda.is_available():
            raise NotImplementedError('Training on CPU is not supported')
        torch.cuda.set_device(params.rank)
        torch.manual_seed(params.seed)
        logger.info('Process %s is now running in gpu:%s', os.getpid(),
                    torch.cuda.current_device())

        data = load_data(params, 'train')
        print(
            data.get_iterator(shuffle=True,
                              group_by_size=True,
                              partition=params.rank))

        encoder, decoder, num_updates = build_mt_model(params)
        trainer = TrainerMT(encoder, decoder, data, params, num_updates)
        for i in range(trainer.epoch, params.max_epoch):
            logger.info("==== Starting epoch %i ...====" % trainer.epoch)
            trainer.train_epoch()
            tqdm.write('Finish epcoh %i.' % i)

    except KeyboardInterrupt:
        pass  # killed by parent, do nothing
    except Exception:
        # propagate exception to parent process, keeping original traceback
        import traceback
        error_queue.put((params.rank, traceback.format_exc()))
示例#4
0
def clts_xencoder_main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # load data
    data = load_data(params)

    # cross lingual  text summarization encoder, text summarization decoder
    xencoder, ts_encoder, ts_decoder = build_clts_xencoder_model(
        params, data['dico'])
    emb_weights = xencoder.embeddings.weight.data.cpu().numpy()

    # with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv', 'w', encoding='utf-8') as out:
    metadata = open("./pretrained_models/mlm_xnli15_1024/xlm15-metadata.txt",
                    "w",
                    encoding='utf-8')
    embeddings = open("./pretrained_models/mlm_xnli15_1024/embeddings.tsv",
                      "w",
                      encoding='utf-8')
    with open("./pretrained_models/mlm_xnli15_1024/token_embeddings.tsv",
              "w",
              encoding='utf-8') as out:
        for i in range(len(data['dico'])):
            word = data['dico'][i]
            emb = '\t'.join([str(v) for v in emb_weights[i]])

            out.write(f"{word}\t{emb}\n")
            metadata.write(f"{word}\n")
            embeddings.write(f"{emb}\n")

    metadata.close()
    embeddings.close()
示例#5
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecGenerator(trainer, data, params)

    # evaluation
    if params.eval_only:
        evaluator.generate(trainer)
        exit()
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    hidden_size = 1024
    encoder = EncoderRNN.EncoderRNN(params.n_words, hidden_size).cuda()
    decoder = Attention_decoder.Attention_decoder(hidden_size,
                                                  params.n_words,
                                                  dropout_p=0.1).cuda()
    trainer = LSTM_Trainer(encoder, decoder, data, params)
    evaluator = LSTM_Evaluator(trainer, data, params)
    # set sampling probabilities for training
    set_sampling_probs(data, params)
    # language model training
    for count in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.try_lstm(lang1, lang2, params.lambda_mt)

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)

    # save the output of softmax
    trainer.save_softmax_output(clm_temp, 'clm_temp')
    trainer.save_softmax_output(ml_temp, 'ml_temp')
    trainer.save_softmax_output(bt_temp, 'bt_temp')
示例#7
0
def perform_translation(input_file_path, translation_directory,
                        cloze_train_path, question_train_path,
                        fasttext_vectors_path, checkpoint_path):
    params = get_params(
        exp_name='translation',
        dump_path=translation_directory,
        cloze_train_path=cloze_train_path,
        question_train_path=question_train_path,
        cloze_test_path=input_file_path,
        fasttext_vectors_path=fasttext_vectors_path,
        checkpoint_path=checkpoint_path,
    )

    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)
    data = load_data(params, mono_only=True)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)
    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    with torch.no_grad():
        lang1, lang2 = 'cloze', 'question'

        evaluator.encoder.eval()
        evaluator.decoder.eval()
        lang1_id = evaluator.params.lang2id[lang1]
        lang2_id = evaluator.params.lang2id[lang2]

        translations = []
        dataset = evaluator.data['mono'][lang1]['test']
        dataset.batch_size = params.batch_size

        for i, (sent1, len1) in enumerate(
                dataset.get_iterator(shuffle=False, group_by_size=False)()):
            encoded = evaluator.encoder(sent1.cuda(), len1, lang1_id)
            sent2_, len2_, _ = evaluator.decoder.generate(encoded, lang2_id)
            lang1_text = convert_to_text(sent1, len1, evaluator.dico[lang1],
                                         lang1_id, evaluator.params)
            lang2_text = convert_to_text(sent2_, len2_, evaluator.dico[lang2],
                                         lang2_id, evaluator.params)
            translations += zip(lang1_text, lang2_text)

        # export sentences to hypothesis file and restore BPE segmentation
        out_name = os.path.join(translation_directory,
                                'output_translations.txt')
        with open(out_name, 'w', encoding='utf-8') as f:
            f.write('\n'.join(['\t'.join(st) for st in translations]) + '\n')
        restore_segmentation(out_name)

    return out_name
示例#8
0
def load(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)
    return trainer, evaluator
示例#9
0
def clts_elmo_main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # load data
    data = load_data(params)

    # cross lingual  text summarization encoder, text summarization decoder
    elmo, ts_encoder, ts_decoder = build_clts_elmo_model(params, data['dico'])
    emb_weights = elmo.language_model.embeddings.weight.data.cpu().numpy()
    with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv',
              'w',
              encoding='utf-8') as out:
        for i in range(len(data['dico'])):
            word = data['dico'][i]
            emb = '\t'.join([str(v) for v in emb_weights[i]])

            out.write(f"{word}\t{emb}\n")
示例#10
0
def inference(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_best_model()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    # evaluation mode
    evaluator.eval_inference()
    exit()
示例#11
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    _lang1, _lang2 = (
        params.langs[0],
        params.langs[1]) if params.langs[0] < params.langs[1] else (
            params.langs[1], params.langs[0])
    dataset = data['para'][(_lang1, _lang2)]['test']
    print(params.n_words)
    print("ref_paths" + str(params.ref_paths))
    for i, ((x1, len1, id1, lenid1), (x2, len2, id2, lenid2)) in enumerate(
            dataset.get_iterator(shuffle=False,
                                 group_by_size=True,
                                 n_sentences=-1,
                                 tokens_per_batch=2000)):
        print('x2' + str(x2.size()))
        print("len2[None] - 1" + str(len2[None] - 1) + " " + str(len2[None]))
        print(str(len2[0]))
        print('len2' + str(len2))
        alen = torch.arange(len2.max(), dtype=torch.long, device=len2.device)
        # do not predict anything given the last target word
        pred_mask = alen[:, None] < len2[None] - 1
        print("pred_mask" + str(pred_mask))
        print(str(pred_mask.size()))
        y = x2[1:].masked_select(pred_mask[:-1])
        print("yyyy" + str(y))
        print(str(y.size()))
        assert len(y) == (len2 - 1).sum().item()
示例#12
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    params.lgs = lgs = params.lgs.split("-")
    if len(lgs) == 1:
        lgs.append(lgs[0])

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # Replace the original MLM steps
            for lang1, lang2 in shuf_order(params.mlm_steps, params):

                if params.do_meta_update:
                    trainer.meta_mlm_step(lang1)
                else:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#13
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # float16
    if params.fp16:
        assert torch.backends.cudnn.enabled
        if params.encoder_only:
            model = network_to_half(model)
        else:
            encoder = network_to_half(encoder)
            decoder = network_to_half(decoder)

    # distributed
    # if params.multi_gpu:
    #     logger.info("Using nn.parallel.DistributedDataParallel ...")
    #     if params.encoder_only:
    #         model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
    #     else:
    #         encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True)
    #         decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True)

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # mass prediction steps
            for lang in shuf_order(params.mass_steps):
                trainer.mass_step(lang, params.lambda_mass)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)
            
            # back-parallel steps
            for lang1, lang2 in shuf_order(params.bmt_steps, params):
                trainer.bmt_step(lang1, lang2, params.lambda_bmt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#14
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    # reload-model options are in here
    if params.encoder_only:
        model = build_model(params, data['dico'])

        if params.use_adapters:
            logger.info("Using adapters")
            for param in model.named_parameters():

                if param[0][:8] != "adapters":
                    param[1].requires_grad = False

            for param_name, param in model.embeddings.named_parameters():
                param.requires_grad = True
            for param_name, param in model.position_embeddings.named_parameters(
            ):
                param.requires_grad = True
            for param_name, param in model.pred_layer.named_parameters():
                param.requires_grad = True
            for param in model.layer_norm_emb.parameters():
                param.requires_grad = True
            for param in model.named_parameters():
                logger.info(param[0] + ' required grad = ' +
                            str(param[1].requires_grad))

    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
        logger.info("Number of trainable parameters (encoder): %i" % sum(
            [p.numel()
             for p in trainer.model.parameters() if p.requires_grad]))

    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)
        logger.info(
            "Number of trainable parameters (encoder): %i" %
            sum([p.numel() for p in encoder.parameters() if p.requires_grad]))
        logger.info(
            "Number of trainable parameters (decoder): %i" %
            sum([p.numel() for p in decoder.parameters() if p.requires_grad]))

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for epoch in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#15
0
def main(params):
    # check_data_params(params)
    check_model_params(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # load data
    data = load_data(params)
    # check_vocab(data)

    # build model
    if params.encoder_only:
        model = build_model(params, data['source_dico'])
    else:
        encoder, decoder = build_model(params, data['source_dico'], data['target_dico'])


    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_iter = 0

        while trainer.n_iter < trainer.epoch_size:
            if params.cs_step:
                trainer.content_selection_step(params.lambda_cs)
            if params.sm_step:
                trainer.summarization_step(params.lambda_sm)
            if params.lm_step:
                trainer.clm_step(params.lambda_lm)
            trainer.iter()
        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)
        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch()
示例#16
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    meta_params = copy.deepcopy(params).meta_params
    params.meta_params = "..."  # to long to be log
    logger = initialize_exp(params)
    params.meta_params = meta_params

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # todo : good params.n_words (We take the one from the first task have this parameter for the moment.)
    """
    But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, 
    and therefore no problem if we choose one at random.
    """
    p = params.meta_params[data['key']]

    # build model
    if params.encoder_only:
        model = build_model(params=p, dico=data['dico'])
    else:
        encoder, decoder = build_model(params=p, dico=data['dico'])

    # todo : good pad_index and eos_index and ... (I'll take the one from the first task for the moment.)
    """
    But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, 
    and therefore no problem if we choose one at random.
    """
    params.pad_index = p.pad_index
    params.eos_index = p.eos_index

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        if not params.meta_learning:
            for k, v in scores.items():
                logger.info("%s -> %.6f" % (k, v))
        else:
            for lgs in params.meta_params.keys():
                logger.info("============ task : %s " % lgs)
                for k, v in scores[lgs].items():
                    if k != "epoch":
                        logger.info("%s -> %.6f" % (k, v))
            logger.info("============ all")
            for k, v in scores.items():
                if not (k in (list(params.meta_params.keys()) + ['epoch'])):
                    logger.info("%s -> %.6f" % (k, v))

        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        if not params.meta_learning:
            trainer.n_sentences = 0
            while trainer.n_sentences < trainer.epoch_size:
                # CLM steps
                for lang1, lang2 in shuf_order(params.clm_steps, params):
                    trainer.clm_step(lang1, lang2, params.lambda_clm)

                # MLM steps (also includes TLM if lang2 is not None)
                for lang1, lang2 in shuf_order(params.mlm_steps, params):
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

                # parallel classification steps
                for lang1, lang2 in shuf_order(params.pc_steps, params):
                    trainer.pc_step(lang1, lang2, params.lambda_pc)

                # denoising auto-encoder steps
                for lang in shuf_order(params.ae_steps):
                    trainer.mt_step(lang, lang, params.lambda_ae)

                # machine translation steps
                for lang1, lang2 in shuf_order(params.mt_steps, params):
                    trainer.mt_step(lang1, lang2, params.lambda_mt)

                # back-translation steps
                for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                    trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

                trainer.iter()
        else:
            # our
            trainer.n_sentences = {}
            """
            Here we build language lists for each of our meta-taks. Indeed, for two language lists l1 and l2, 
            the objective will be done with l1[i] and l2[i] respectively, this for each index i of the two lists. 
            """
            lang1_dic, lang2_dic, lang3_dic = {}, {}, {}
            """
            In the case of meta-learning, we have a (meta-)data dictionary for each (meta-)task, 
            so the keys are the languages conserved by the task. 
            """
            data_keys_dic = {}

            # equivalent to "for task in list of task" in the original algorithm,  except here we prepare all the tasks beforehand.
            for lgs in params.meta_params.keys():
                trainer.n_sentences[lgs] = 0

                # CLM
                try:
                    lang1_dic['clm_step']
                except KeyError:
                    lang1_dic['clm_step'], lang2_dic[
                        'clm_step'], data_keys_dic['clm_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].clm_steps, params):
                    lang1_dic['clm_step'].append(lang1)
                    lang2_dic['clm_step'].append(lang2)
                    data_keys_dic['clm_step'].append(lgs)

                # MLM
                try:
                    lang1_dic['mlm_step']
                except KeyError:
                    lang1_dic['mlm_step'], lang2_dic[
                        'mlm_step'], data_keys_dic['mlm_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].mlm_steps, params):
                    lang1_dic['mlm_step'].append(lang1)
                    lang2_dic['mlm_step'].append(lang2)
                    data_keys_dic['mlm_step'].append(lgs)

                # parallel classification
                try:
                    lang1_dic['pc_step']
                except KeyError:
                    lang1_dic['pc_step'], lang2_dic['pc_step'], data_keys_dic[
                        'pc_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].pc_steps, params):
                    lang1_dic['pc_step'].append(lang1)
                    lang2_dic['pc_step'].append(lang2)
                    data_keys_dic['pc_step'].append(lgs)

                # denoising auto-encoder
                try:
                    lang1_dic['ae_step']
                except KeyError:
                    lang1_dic['ae_step'], data_keys_dic['ae_step'] = [], []
                for lang1 in shuf_order(params.meta_params[lgs].ae_steps):
                    lang1_dic['ae_step'].append(lang1)
                    data_keys_dic['ae_step'].append(lgs)

                # machine translation
                try:
                    lang1_dic['mt_step']
                except KeyError:
                    lang1_dic['mt_step'], lang2_dic['mt_step'], data_keys_dic[
                        'mt_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].mt_steps, params):
                    lang1_dic['mt_step'].append(lang1)
                    lang2_dic['mt_step'].append(lang2)
                    data_keys_dic['mt_step'].append(lgs)

                # back-translation
                try:
                    lang1_dic['bt_step']
                except KeyError:
                    lang1_dic['bt_step'], lang2_dic['bt_step'], lang3_dic[
                        'bt_step'], data_keys_dic['bt_step'] = [], [], [], []
                for lang1, lang2, lang3 in shuf_order(
                        params.meta_params[lgs].bt_steps):
                    lang1_dic['bt_step'].append(lang1)
                    lang2_dic['bt_step'].append(lang2)
                    lang3_dic['bt_step'].append(lang3)
                    data_keys_dic['bt_step'].append(lgs)

            flag = True

            # equivalent to "while not done do" in the original algorithm
            while flag:

                # CLM steps
                #print("clm_step", flag)
                a = trainer.clm_step(lang1_dic['clm_step'],
                                     lang2_dic['clm_step'], params.lambda_clm,
                                     data_keys_dic['clm_step'])

                #print("mlm_step", flag)
                # MLM steps (also includes TLM if lang2 is not None)
                b = trainer.mlm_step(lang1_dic['mlm_step'],
                                     lang2_dic['mlm_step'], params.lambda_mlm,
                                     data_keys_dic['mlm_step'])

                # parallel classification steps
                c = trainer.pc_step(lang1_dic['pc_step'], lang2_dic['pc_step'],
                                    params.lambda_pc, data_keys_dic['pc_step'])

                if isinstance(trainer, EncDecTrainer):

                    # denoising auto-encoder steps
                    d = trainer.mt_step(lang1_dic['ae_step'],
                                        lang1_dic['ae_step'], params.lambda_ae,
                                        data_keys_dic['ae_step'])

                    # machine translation steps
                    e = trainer.mt_step(lang1_dic['mt_step'],
                                        lang2_dic['mt_step'], params.lambda_mt,
                                        data_keys_dic['mt_step'])

                    # back-translation steps
                    f = trainer.bt_step(lang1_dic['bt_step'],
                                        lang2_dic['bt_step'],
                                        lang3_dic['bt_step'], params.lambda_bt,
                                        data_keys_dic['bt_step'])

                    # do things better
                    if (not a) and (not b) and (not c) and (not d) and (
                            not e) and (not f):
                        flag = False  # End of epoch
                    else:
                        flag = True
                else:
                    # do things better
                    if (not a) and (not b) and (not c):
                        flag = False  # End of epoch
                    else:
                        flag = True

                trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        if not params.meta_learning:
            for k, v in scores.items():
                logger.info("%s -> %.6f" % (k, v))
        else:
            for lgs in params.meta_params.keys():
                logger.info("============ task : %s " % lgs)
                for k, v in scores[lgs].items():
                    if k != "epoch":
                        logger.info("%s -> %.6f" % (k, v))
            logger.info("============ all")
            for k, v in scores.items():
                if not (k in (list(params.meta_params.keys()) + ['epoch'])):
                    logger.info("%s -> %.6f" % (k, v))

        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)

        # our
        logger.info("============ garbage collector collecting %d ..." %
                    gc.collect())
示例#17
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)
    
    # initialize the experiment
    logger = initialize_exp(params)

#     # initialize SLURM signal handler for time limit / pre-emption
#     init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

#     # float16
#     if params.fp16:
#         assert torch.backends.cudnn.enabled
#         if params.encoder_only:
#             model = network_to_half(model)
#         else:
#             encoder = network_to_half(encoder)
#             decoder = network_to_half(decoder)

#     # distributed
#     if params.multi_gpu:
#         logger.info("Using nn.parallel.DistributedDataParallel ...")
#         if params.fp16:
#             if params.encoder_only:
#                 model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
#             else:
#                 encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True)
#                 decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True)
#         else:
#             if params.encoder_only:
#                 model = nn.parallel.DistributedDataParallel(model, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True)
#             else:
#                 encoder = nn.parallel.DistributedDataParallel(encoder, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True)
#                 decoder = nn.parallel.DistributedDataParallel(decoder, device_ids=[params.local_rank], output_device=params.local_rank, broadcast_buffers=True)

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

#     # evaluation
#     if params.eval_only:
#         scores = evaluator.run_all_evals(trainer)
#         for k, v in scores.items():
#             logger.info("%s -> %.6f" % (k, v))
#         logger.info("__log__:%s" % json.dumps(scores))
#         exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0
        trainer.n_images = 0

        while trainer.n_sentences < trainer.epoch_size or trainer.n_images < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            # shuf_order's result could be: ['fr', 'fr'] or ['en', 'fr'] or ['fr', 'en'] or ['en', 'en']
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)
                
            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)
                
            # Image-language pretraining steps 
            trainer.ipm_step("coco36", params.lambda_ipm)

            # CMLM steps steps
            for m1, m2 in shuf_order(params.cmlm_steps, params):
                trainer.cmlm_step(m1, m2, params.lambda_cmlm)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()


        logger.info("============ End of epoch %i ============" % trainer.epoch)
示例#18
0
def clts_elmo_main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # cross lingual encoder

    # cross lingual  text summarization encoder, text summarization decoder
    elmo, ts_encoder, ts_decoder = build_clts_elmo_model(params, data['dico'])

    trainer = XLMCLTSEncDecTrainer(elmo, ts_encoder, ts_decoder, data, params)
    evaluator = XLMCLTSEncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#19
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # float16
    if params.fp16:
        assert torch.backends.cudnn.enabled
        if params.encoder_only:
            model = network_to_half(model)
        else:
            encoder = network_to_half(encoder)
            decoder = network_to_half(decoder)

    # distributed
    if params.multi_gpu:
        logger.info("Using nn.parallel.DistributedDataParallel ...")
        if params.encoder_only:
            model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
        else:
            encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True)
            decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True)

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        logger.info('Evaluating and saving new result file')
        scores = evaluator.run_all_evals_match(trainer)
        for k, v in scores.items():
            if 'likelihood' in k:
                logger.info("%s -> %.6f" % (k, np.mean(v)))
            elif 'scores' in k:
                logger.info("%s -> %s" % (k, v.shape))
            else:
                logger.info("%s -> %.6f" % (k, v))

        np.savetxt(os.path.join(params.dump_path, 'best-fwd-prediction.txt'),scores['%s_%s_fwd_scores' % ('test', params.mass_steps[0])],fmt='%f')
        for match in params.match_files.split(','):
            np.savetxt(os.path.join(params.dump_path, 'best-match-prediction{}.txt'.format(match.split('.')[-1])),
                   scores['%s_%s_sentence_likelihood' % (match, params.mass_steps[0])], fmt='%f')
        labels = np.loadtxt(os.path.join(params.data_path, 'labels'))
        targets = np.loadtxt(os.path.join(params.data_path, 'suffix'))
        preds = scores['%s_%s_sentence_likelihood' % ('match', params.mass_steps[0])]
        results = pd.DataFrame({'label': labels, 'target': targets, 'pred': preds})
        results.to_pickle(os.path.join(params.dump_path, 'best-matching-prediction.pkl'))
        #logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)
    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # mass prediction steps
            for lang in shuf_order(params.mass_steps):
                trainer.mass_step(lang, params.lambda_mass)
            trainer.iter()
        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_epoch_evals_match(trainer)
        # print / JSON log
        for k, v in scores.items():
            if 'likelihood' in k:
                logger.info("%s -> %.6f" % (k, np.mean(v)))
            elif 'scores' in k:
                logger.info("%s -> %s" % (k, v.shape))
            else:
                logger.info("%s -> %.6f" % (k, v))
        #if params.is_master:
            #logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#20
0
    "--length_penalty",
    type=float,
    default=1.0,
    help="Length penalty: <1.0 favors shorter, >1.0 favors longer sentences")
params = parser.parse_args()

if __name__ == '__main__':

    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    encoder = nn.DataParallel(encoder,
                              device_ids=[0, 1, 2],
                              output_device=[0, 1, 2])
    decoder = nn.DataParallel(decoder,
                              device_ids=[0, 1, 2],
                              output_device=[0, 1, 2])
    if discriminator != None:
        discriminator = nn.DataParallel(discriminator,
                                        device_ids=[0, 1, 2],
                                        output_device=[0, 1, 2])
    if lm != None:
        lm = nn.DataParallel(lm, device_ids=[0, 1, 2], output_device=[0, 1, 2])
示例#21
0
def main(args):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(args, make_communication_groups=False)

    # initialize the experiment
    logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec',
                                            'loss', 'prec_val', 'loss_val')

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    main_data_path = args.data_path
    if args.debug:
        args.data_path = os.path.join(main_data_path, 'val')
    else:
        args.data_path = os.path.join(main_data_path, 'train')
    train_dataset = load_data(args)

    args.data_path = os.path.join(main_data_path, 'val')
    val_dataset = load_data(args)

    # prepare the different data transformations
    tr_val, tr_train = get_data_transformations()
    train_dataset.transform = tr_train
    val_dataset.transform = tr_val
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=True,
    )

    # build model skeleton
    fix_random_seeds(args.seed)
    nmb_classes = 205 if 'places' in args.data_path else 1000
    model = model_factory(args, relu=True, num_classes=nmb_classes)

    # load pretrained weights
    load_pretrained(model, args)

    # merge sobel layers with first convolution layer
    if args.sobel2RGB:
        sobel2RGB(model)

    # re initialize classifier
    if hasattr(model.body, 'classifier'):
        for m in model.body.classifier.modules():
            if isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.fill_(0.1)

    # distributed training wrapper
    model = to_cuda(model, [args.gpu_to_work_on], apex=True)
    logger.info('model to cuda')

    # set optimizer
    optimizer = sgd_optimizer(model, args.lr, args.wd)

    ## variables to reload to fetch in checkpoint
    to_restore = {'epoch': 0, 'start_iter': 0}

    # re start from checkpoint
    restart_from_checkpoint(
        args,
        run_variables=to_restore,
        state_dict=model,
        optimizer=optimizer,
    )
    args.epoch = to_restore['epoch']
    args.start_iter = to_restore['start_iter']

    if args.evaluate:
        validate_network(val_loader, [model], args)
        return

    # Supervised training
    for _ in range(args.epoch, args.nepochs):

        logger.info("============ Starting epoch %i ... ============" %
                    args.epoch)

        fix_random_seeds(args.seed + args.epoch)

        # train the network for one epoch
        adjust_learning_rate(optimizer, args)
        scores = train_network(args, model, optimizer, train_dataset)

        scores_val = validate_network(val_loader, [model], args)

        # save training statistics
        logger.info(scores + scores_val)
        training_stats.update(scores + scores_val)
示例#22
0
def main(args):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(args, make_communication_groups=False)

    # initialize the experiment
    logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec',
                                            'loss', 'prec_val', 'loss_val')

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    if not 'pascal' in args.data_path:
        main_data_path = args.data_path
        args.data_path = os.path.join(main_data_path, 'train')
        train_dataset = load_data(args)
    else:
        train_dataset = VOC2007_dataset(args.data_path, split=args.split)

    args.test = 'val' if args.split == 'train' else 'test'
    if not 'pascal' in args.data_path:
        if args.cross_valid is None:
            args.data_path = os.path.join(main_data_path, 'val')
        val_dataset = load_data(args)
    else:
        val_dataset = VOC2007_dataset(args.data_path, split=args.test)

    if args.cross_valid is not None:
        kfold = KFold(per_target(train_dataset.imgs), args.cross_valid, args.kfold)
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, sampler=kfold.train,
            num_workers=args.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=args.batch_size, sampler=kfold.val,
            num_workers=args.workers)

    else:
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=True,
            num_workers=args.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers)

    # prepare the different data transformations
    tr_val, tr_train = get_data_transformations()
    train_dataset.transform = tr_train
    val_dataset.transform = tr_val

    # build model skeleton
    fix_random_seeds(args.seed)
    model = model_factory(args.arch, args.sobel)

    load_pretrained(model, args)

    # keep only conv layers
    model.body.classifier = None
    model.conv = args.conv

    if 'places' in args.data_path:
        nmb_classes = 205
    elif 'pascal' in args.data_path:
        nmb_classes = 20
    else:
        nmb_classes = 1000

    reglog = RegLog(args.arch, nmb_classes, args.conv)

    # distributed training wrapper
    model = to_cuda(model, [args.gpu_to_work_on], apex=False)
    reglog = to_cuda(reglog, [args.gpu_to_work_on], apex=False)
    logger.info('model to cuda')


    # set optimizer
    optimizer = sgd_optimizer(reglog, args.lr, args.wd)

    ## variables to reload to fetch in checkpoint
    to_restore = {'epoch': 0, 'start_iter': 0}

    # re start from checkpoint
    restart_from_checkpoint(
        args,
        run_variables=to_restore,
        state_dict=reglog,
        optimizer=optimizer,
    )
    args.epoch = to_restore['epoch']
    args.start_iter = to_restore['start_iter']

    model.eval()
    reglog.train()

    # Linear training
    for _ in range(args.epoch, args.nepochs):

        logger.info("============ Starting epoch %i ... ============" % args.epoch)

        # train the network for one epoch
        scores = train_network(args, model, reglog, optimizer, train_loader)

        if not 'pascal' in args.data_path:
            scores_val = validate_network(val_loader, [model, reglog], args)
        else:
            scores_val = evaluate_pascal(val_dataset, [model, reglog])

        scores = scores + scores_val

        # save training statistics
        logger.info(scores)
        training_stats.update(scores)
示例#23
0
文件: train_x.py 项目: microsoft/M3P
def main(params):
    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    print(data)

    # build model
    if params.encoder_only:
        model = build_model(params)
    else:
        encoder, decoder = build_model(params)

    # build trainer, reload potential checkpoints / build evaluator

    trainer = XTrainer(model, data, params)
    evaluator = XEvaluator(trainer, data, params)
    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:
            # MLM steps (also includes TLM if lang2 is not None)

            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                if params.is_understanding:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            for lang1, lang2 in shuf_order(params.text_steps, params):
                if params.is_ntg:
                    trainer.ntg_step(lang1, None, params.lambda_mlm)

            # cross-modal caption steps
            for lang1, lang2 in shuf_order(params.cross_modal_steps, params):
                if params.is_mt:
                    trainer.mt_ic_step(lang1, lang2, params.lambda_ic)
                else:
                    trainer.ic_step(lang1, lang2, params.lambda_ic)

                if params.is_freelb:
                    trainer.free_lb_ic_step(lang1, lang2, params.lambda_ic)

            for lang1, lang2 in shuf_order(params.mlm_steps, params, n=3):
                if params.is_generation:
                    trainer.bart_mlm_step(lang1, lang2, params.lambda_imlm)
                    trainer.bart_mass_step(lang1, lang2, params.lambda_imlm)

            for lang1, lang2 in shuf_order(params.cross_ae_steps, params):
                trainer.bart_img_step(lang1, lang2, params.lambda_ida)

            for lang1, lang2 in shuf_order(params.cross_rel_steps, params):
                if params.is_pretrain:
                    trainer.pretrain_rel_step(lang1, lang2)
                else:
                    if params.is_slide:
                        trainer.slide_step(lang1, lang2, params.lambda_t2i)
                    else:
                        # support multi languages
                        trainer.rel_step(lang1, lang2, params.lambda_t2i, params.lambda_i2t)

            # for lang1, lang2 in shuf_order(params.cross_mlm_steps, params):
            #     trainer.mlm_step(lang1, lang2, params.lambda_mlm)
            #
            # for lang1, lang2 in shuf_order(params.cross_mrm_steps, params):
            #     trainer.mrm_step(lang1, lang2, params.lambda_mrm)
            #
            # for lang1, lang2 in shuf_order(params.cross_mrfr_steps, params):
            #     trainer.mrfr_step(lang1, lang2, params.lambda_mrfr)

            trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        evaluate_results = []
        import os
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))
            evaluate_results.append(json.dumps(scores))
            with open(os.path.join(params.dump_path, "epoch_{0}.eval_log".format(trainer.epoch)), 'w') as writer:
                for line in evaluate_results:
                    writer.write(line + '\n')

        # end of epoch
        trainer.save_best_model(scores)
        if trainer.epoch % params.save_every_epoch == 0 and params.is_master:
            trainer.save_model('model_pretrain_%i' % trainer.epoch)
        trainer.save_periodic()
        trainer.end_epoch(scores)
def main(arguments):
    """ """
    parser = argparse.ArgumentParser(
        description='Enumerate over all possible positions to pick the best one'
    )

    parser.add_argument(
        '--model_path',
        type=str,
        default=
        '/misc/kcgscratch1/ChoGroup/mansimov/XLM-data/exp_elman/finetune_deen_tlm_uniform_4gpu_128batch_pickside_lr_debug/912lweev6s/best-valid_de-en_mt_bleu.pth',
        help='path to pretrained TLM model')
    parser.add_argument('--src_lang',
                        type=str,
                        default='de',
                        help='source language')
    parser.add_argument('--trg_lang',
                        type=str,
                        default='en',
                        help='target language')
    parser.add_argument('--split',
                        type=str,
                        default='valid',
                        help='use valid/test split of dataset',
                        choices=['valid', 'test'])
    parser.add_argument('--use_data_length',
                        action='store_true',
                        help='use lengths according to dataset statistics')
    parser.add_argument(
        '--num_topk_lengths',
        type=int,
        default=1,
        help='number of topk lengths to use when using dataset statistics')
    parser.add_argument('--beam_size',
                        type=int,
                        default=1,
                        help='beam size to use in the experiments')
    parser.add_argument('--length_penalty',
                        type=int,
                        default=1,
                        help='length penalty to use')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='batch size to use')
    parser.add_argument('--gen_type', type=str, default="src2trg", \
                        choices=['src2trg', 'trg2src'], \
                        help='generation type to use src2trg (de->en) or trg2src (en->de)')
    parser.add_argument('--print_every',
                        type=int,
                        default=10,
                        help='how often to log progress')
    parser.add_argument('--alpha',
                        type=float,
                        default=1.,
                        help='weight to put on entropy')
    parser.add_argument('--beta',
                        type=float,
                        default=1.,
                        help='weight to put on log prob')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.,
                        help='weight to put on left to right decoding')
    parser.add_argument('--uniform',
                        action='store_true',
                        help='do uniform sampling of positions')
    parser.add_argument(
        '--iter_mult',
        type=int,
        default=1,
        help='iteration multipler (multiply this number by target length)')
    parser.add_argument(
        '--mask_schedule',
        type=str,
        choices=["constant", "linear", "all"],
        default="linear",
        help='schedule for number of masks to predict at each iteration')
    parser.add_argument(
        '--constant_k',
        type=int,
        default=1,
        help="If constant mask schedule, number of masks at each iteration")
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='GPU ID, use -1 for CPU')
    args = parser.parse_args(arguments)

    if args.uniform:
        args.alpha, args.beta, args.gamma = 0, 0, 0

    # set GPU
    if args.gpu_id >= 0:
        torch.cuda.set_device(args.gpu_id)

    print("Evaluating model at {0}".format(args.model_path))
    # load everything from checkpoint
    params, dico, model = reload_checkpoint(args.model_path)
    # put on gpu
    model = model.cuda() if args.gpu_id >= 0 else model
    # put in eval model
    model = model.eval()

    if args.use_data_length:
        params.de2en_lengths = pkl.load(
            open(os.path.join(params.data_path, 'de2en_lengths.pkl'), 'rb'))
        params.en2de_lengths = pkl.load(
            open(os.path.join(params.data_path, 'en2de_lengths.pkl'), 'rb'))
        params.num_topk_lengths = args.num_topk_lengths
    else:
        params.de2en_lengths = None
        params.en2de_lengths = None
        params.num_topk_lengths = 1

    # load data
    params.eval_only = True
    params.batch_size = args.batch_size
    data = load_data(params)

    # creates reference files for BLEU eval
    prepare_data(params, data, args.split, args.gen_type, args.alpha,
                 args.beta, args.gamma, args.uniform, args.iter_mult,
                 args.use_data_length, args.num_topk_lengths,
                 args.mask_schedule, args.constant_k)

    # evaluate
    run(model, params, dico, data, args.split, args.src_lang, args.trg_lang,
        args.gen_type, args.alpha, args.beta, args.gamma, args.uniform,
        args.iter_mult, args.mask_schedule, args.constant_k, args.batch_size,
        args.gpu_id)
示例#25
0
                    help="clip grad norm")
parser.add_argument("--id", type=int, default=0)
parser.add_argument("--checkpoint_dir",
                    type=str,
                    default='/data2/twang/simple-fairseq/all_models/big')
params = parser.parse_args()
params.gpu_num = 1
params.seed = 1234
params.reload_model = '{}/model_epoch{}.pt'.format(params.checkpoint_dir,
                                                   params.id)
params.translate_file = 'data/valid.bpe.zh'
params.src_dico_file = 'data/dict.bpe.zh'
params.tgt_dico_file = 'data/dict.bpe.en'
params.out_file = '{}/predict_{}.en'.format(params.checkpoint_dir, params.id)
if __name__ == '__main__':
    data = load_data(params, name='test')
    encoder, decoder, _ = build_mt_model(params)
    encoder.eval()
    decoder.eval()
    iterator = data.get_iterator(shuffle=False, group_by_size=False)()
    file = open(params.out_file, 'w', encoding='utf-8')
    total = 0
    with torch.no_grad():
        for (sen1, len1) in iterator:
            len1, bak_order = len1.sort(descending=True)
            sen1 = sen1[:, bak_order]
            sen1 = sen1.cuda()
            encoded = encoder(sen1, len1)
            sent2, len2, _ = decoder.generate(encoded)
            total += len2.size(0)
            logger.info('Translating %i sentences.' % total)
示例#26
0
def main(params):

    init_distributed_mode(params)

    # load data
    data = load_data(params)

    emb_weights = None
    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
        emb_weights = model.embeddings.weight.data.cpu().numpy()
    else:
        encoder, decoder = build_model(params, data['dico'])
        emb_weights = encoder.embeddings.weight.data.cpu().numpy()

    if params.gen_word_emb:

        # metadata = open(
        #     "./pretrained_models/mlm_xnli15_1024/xlm15-metadata.txt", "w", encoding='utf-8')
        # embeddings = open(
        #     "./pretrained_models/mlm_xnli15_1024/embeddings.tsv", "w", encoding='utf-8')

        # with open("./pretrained_models/mlm_xnli15_1024/token_embeddings.tsv", "w", encoding='utf-8') as out:
        with open(f'./dumped/{params.exp_name}/{params.exp_id}/embeddings.tsv',
                  'w',
                  encoding='utf-8') as out:
            for i in range(len(data['dico'])):
                word = data['dico'][i]
                emb = '\t'.join([str(v) for v in emb_weights[i]])

                out.write(f"{word}\t{emb}\n")
        #         metadata.write(f"{word}\n")
        #         embeddings(f"{emb}\n")

        # metadata.close()
        # embeddings.close()

    if params.gen_sent_emb:
        evaluator = SingleEvaluator(None, data, params)

        sents = {}
        sent_embs = {}
        with torch.no_grad():
            data_set = 'test'
            for lang1, lang2 in params.mlm_steps:
                # lang2 is None
                _sents, _sent_embs = evaluator.generate_sent_emb(
                    data_set, lang1, lang2)
                sents[lang1] = _sents
                sent_embs[lang1] = _sent_embs

        for lang1, lang2 in params.mlm_steps:
            out = open(
                f'./dumped/{params.exp_name}/{params.exp_id}/sent_embs-{lang1}.tsv',
                'w',
                encoding='utf-8')
            for sent, emb in zip(sents[lang1], sent_embs[lang1]):
                emb = '\t'.join([str(v) for v in emb])
                out.write(f"{sent.strip()}\t{emb}\n")

            out.close()
示例#27
0
def main(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    # evaluation mode
    if params.eval_only:
        evaluator.run_all_evals(0)
        exit()

    # language model pretraining
    if params.lm_before > 0:
        logger.info("Pretraining language model for %i iterations ..." %
                    params.lm_before)
        trainer.n_sentences = 0
        for _ in range(params.lm_before):
            for lang in params.langs:
                trainer.lm_step(lang)
            trainer.iter()

    # define epoch size
    if params.epoch_size == -1:
        params.epoch_size = params.n_para
    assert params.epoch_size > 0

    # start training
    for _ in range(trainer.epoch, params.max_epoch):

        logger.info(
            "====================== Starting epoch %i ... ======================"
            % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < params.epoch_size:

            # discriminator training
            for _ in range(params.n_dis):
                trainer.discriminator_step()

            # language model training
            if params.lambda_lm > 0:
                for _ in range(params.lm_after):
                    for lang in params.langs:
                        trainer.lm_step(lang)

            # MT training (parallel data)
            if params.lambda_xe_para > 0:
                for lang1, lang2 in params.para_directions:
                    trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para)

            # MT training (back-parallel data)
            if params.lambda_xe_back > 0:
                for lang1, lang2 in params.back_directions:
                    trainer.enc_dec_step(lang1,
                                         lang2,
                                         params.lambda_xe_back,
                                         back=True)

            # autoencoder training (monolingual data)
            if params.lambda_xe_mono > 0:
                for lang in params.mono_directions:
                    trainer.enc_dec_step(lang, lang, params.lambda_xe_mono)

            # AE - MT training (on the fly back-translation)
            if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0:

                # start on-the-fly batch generations
                if not getattr(params, 'started_otf_batch_gen', False):
                    otf_iterator = trainer.otf_bt_gen_async()
                    params.started_otf_batch_gen = True

                # update model parameters on subprocesses
                if trainer.n_iter % params.otf_sync_params_every == 0:
                    trainer.otf_sync_params()

                # get training batch from CPU
                before_gen = time.time()
                batches = next(otf_iterator)
                trainer.gen_time += time.time() - before_gen

                # training
                for batch in batches:
                    lang1, lang2, lang3 = batch['lang1'], batch[
                        'lang2'], batch['lang3']
                    # 2-lang back-translation - autoencoding
                    if lang1 != lang2 == lang3:
                        trainer.otf_bt(batch, params.lambda_xe_otfa,
                                       params.otf_backprop_temperature)
                    # 2-lang back-translation - parallel data
                    elif lang1 == lang3 != lang2:
                        trainer.otf_bt(batch, params.lambda_xe_otfd,
                                       params.otf_backprop_temperature)
                    # 3-lang back-translation - parallel data
                    elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3:
                        trainer.otf_bt(batch, params.lambda_xe_otfd,
                                       params.otf_backprop_temperature)

            trainer.iter()

        # end of epoch
        logger.info(
            "====================== End of epoch %i ======================" %
            trainer.epoch)

        # evaluate discriminator / perplexity / BLEU
        scores = evaluator.run_all_evals(trainer.epoch)

        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))

        # save best / save periodic / end epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
        trainer.test_sharing()
示例#28
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    if params.other_seed > -1:
        # deterministic
        torch.manual_seed(params.other_seed)
        torch.cuda.manual_seed(params.other_seed)
        np.random.seed(params.other_seed)
        random.seed(params.other_seed)

    if params.iter_seed == -1:
        # non-deterministic
        params.iter_seed = None

    # load data
    data = load_data(params)
    writer = SummaryWriter(params.dump_path + "/" + params.exp_name + "_log")

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        sys.exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)
    _iter = 0

    # dump initial weights
    if params.save_initial:
        trainer.save_checkpoint('initial', include_optimizers=False)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:
            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                if params.only_vlm:
                    # with visual features
                    trainer.vlm_step(lang1, lang2, params.lambda_mlm, _iter)
                else:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm, _iter)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            for lang1, lang2 in shuf_order(params.mmt_steps, params):
                trainer.mmt_step(lang1, lang2, params.lambda_mt)

            trainer.iter()
            _iter += 1

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            writer.add_scalar(k, v, _iter)
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#29
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps (causal languge model)
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3,
                                params.lambda_bt, params.bt_sample_temperature)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        if params.validation_metrics != '':
            trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
示例#30
0
# %autoreload 2

# %%
import numpy as np
from plotly.offline import iplot
import plotly.io as pio

from src.data.loader import load_data
from src.utility import Utility
import diofant
from pprint import pprint as print
import cufflinks as cf
cf.go_offline()

# %%
data = load_data()

# %%
data.set_index("governorate")[[
    "Number of institutes, centers and specialized hospitals in public sector",
    "Number of public district hospitals",
    "Number of regional public hospitals",
    "The number of public hospitals",
]].fillna(0)

# %% [markdown]
# ## Indicators

# %% [markdown]
# 1. Access to professionals
#     * Number of doctors / 1000 inhabitants