Exemplo n.º 1
0
def main(argv):
    config = Config()
    config.load_user_config()
    config.log.info("finish loading user config")

    train_file = config.args["train_file"]
    dev_file = config.args["dev_file"]
    old_glove_file = config.args["glove_file"]
    new_glove_file = config.args["glove_file"] + ".subset"

    # TODO(demi): switch "overwrite" to False
    train_data_raw, dev_data_raw, i2w, w2i, i2c, c2i, new_glove_file, glove_dim, vocab_size, char_vocab_size\
         = squad_read_data(config, train_file, dev_file, old_glove_file, new_glove_file, overwrite=True)
    config.log.info("finish reading squad data in raw formats")

    config.update_batch([("glove_file", new_glove_file),
                   ("glove_dim", glove_dim),
                   ("vocab_size", vocab_size),
                   ("char_vocab_size", char_vocab_size)])


    config.log.warning("reminder: now we only support train/fake mode")
    assert config.args["mode"] in ["train", "fake"], "mode (%s) not found" % config.args["mode"]

    train_id_conversion, train_data = make_dataset(config, train_data_raw, w2i, c2i)
    dev_id_conversion, dev_data = make_dataset(config, dev_data_raw, w2i, c2i)
    config.log.info("finish making datasets: reformatting raw data")

    train_data = QnADataset(train_data, config)
    dev_data = QnADataset(dev_data, config)
    config.log.info("finish generating datasets")

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True, **config.kwargs)
    dev_loader = torch.utils.data.DataLoader(dev_data, batch_size=1, **config.kwargs)
    config.log.info("finish generating data loader")


    model = BiDAF(config, i2w)
    config.log.info("finish creating model")
    if config.args["use_cuda"]:
        model.cuda()

    # log config and model
    config.log.info(config.format_string())
    config.log.info("model:{}".format(model))

    if config.args['optimizer'] == "Adam":
        optimizer = optim.Adam(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adamax":
        optimizer = optim.Adamax(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "SGD":
        optimizer = torch.optim.SGD(model.get_train_parameters(), lr=config.args['lr'], momentum=0.9, weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adadelta":
        optimizer = torch.optim.Adadelta(model.get_train_parameters(), lr=config.args["lr"])
    #if config.args['optimizer'] == "Adagrad":



    config.log.info("model = %s" % model)
    config.log.info("config = %s" % config.format_string())

    trainer = Trainer(config)
    evaluator = Evaluator(config)

    """ save model checkpoint """
    def save_checkpoint(epoch):
        checkpoint = {"model_state_dict": model.state_dict(),
                      "config_args" : config.args}
        if config.args["optimizer"] != "YF":  # YF can't save state dict right now
            checkpoint["optimizer_state_dict"] = optimizer.state_dict()
        checkpoint_file = config.args["model_dir"] + config.args["model_name"] + "-EPOCH%d" % epoch
        torch.save(checkpoint, checkpoint_file)
        config.log.info("saving checkpoint: {}".format(checkpoint_file))


    for epoch in range(1, config.args["max_epoch"] + 1):
        config.log.info("training: epoch %d" % epoch)
        # QS(demi): do i need to return model & optimizer?
        model, optimizer, train_avg_loss, train_answer_dict = trainer.run(model, train_id_conversion[0], train_loader, optimizer, mode="train")
        model, optimizer, dev_avg_loss, dev_answer_dict = trainer.run(model, dev_id_conversion[0], dev_loader, optimizer, mode="dev")

        # loss is a float tensor with size 1
        config.log.info("[EPOCH %d] LOSS = (train)%.5lf | (dev)%.5lf" % (epoch, train_avg_loss[0], dev_avg_loss[0]))

        answer_filename = "{}/{}-EPOCH{}".format(config.args["model_dir"], config.args["model_name"], epoch)
        config.log.info("[EVAUATION] TRAIN EVAL")
        evaluator.eval("official", train_file, train_answer_dict, "{}/answer.train".format(config.args["model_dir"], answer_filename))
        config.log.info("[EVAUATION] DEV EVAL")
        evaluator.eval("official", dev_file, dev_answer_dict, "{}/answer.dev".format(config.args["model_dir"], answer_filename))

        save_checkpoint(epoch)
        
        exact_match += torch.sum(p1_corr * p2_corr).data[0]
        total += batch_size
        if i % 10 == 0:
            print('current acc: {:.3f}%'.format(100*exact_match/total))

    print('======== Test result ========')
    print('p1 acc: {:.3f}%, p2 acc: {:.3f}%, EM: {:.3f}'.format(100.*p1_acc/total, 100.*p2_acc/total, 100.*exact_match/total))
# }}}

#create model
model = BiDAF(args)

if torch.cuda.is_available():
    print('use cuda')
    model.cuda()


#resume
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
if os.path.isfile(args.resume):
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.resume)
    args.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(args.resume))

ema = EMA(0.999)
Exemplo n.º 3
0
def main(NMT_config):

    ### Load RL (global) configurations ###
    config = parse_args()

    ### Load trained QA model ###
    QA_checkpoint = torch.load(config.data_dir + config.QA_best_model)
    QA_config = QA_checkpoint['config']

    QA_mod = BiDAF(QA_config)
    if QA_config.use_gpu:
        QA_mod.cuda()
    QA_mod.load_state_dict(QA_checkpoint['state_dict'])

    ### Load SQuAD dataset ###
    data_filter = get_squad_data_filter(QA_config)

    train_data = read_data(QA_config,
                           'train',
                           QA_config.load,
                           data_filter=data_filter)
    dev_data = read_data(QA_config, 'dev', True, data_filter=data_filter)

    update_config(QA_config, [train_data, dev_data])

    print("Total vocabulary for training is %s" % QA_config.word_vocab_size)

    # from all
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if QA_config.lower_word else train_data.shared[
            'word2vec']
    # from filter-out set
    word2idx_dict = train_data.shared['word2idx']

    # filter-out set idx-vector
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))

    # <null> and <unk> do not have corresponding vector so random.
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(QA_config.word_emb_size), np.eye(QA_config.word_emb_size))
        for idx in range(QA_config.word_vocab_size)
    ])

    config.emb_mat = emb_mat
    config.new_emb_mat = train_data.shared['new_emb_mat']

    num_steps = int(
        math.ceil(train_data.num_examples /
                  (QA_config.batch_size *
                   QA_config.num_gpus))) * QA_config.num_epochs

    # offset for question mark
    NMT_config.max_length = QA_config.ques_size_th - 1
    NMT_config.batch_size = QA_config.batch_size

    ### Construct translator ###
    translator = make_translator(NMT_config, report_score=True)

    ### Construct optimizer ###
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 translator.model.parameters()),
                          lr=config.lr)

    ### Start RL training ###
    count = 0
    QA_mod.eval()
    F1_eval = F1Evaluator(QA_config, QA_mod)
    #eval_model(QA_mod, train_data, dev_data, QA_config, NMT_config, config, translator)

    for i in range(config.n_episodes):
        for batches in tqdm(train_data.get_multi_batches(
                QA_config.batch_size,
                QA_config.num_gpus,
                num_steps=num_steps,
                shuffle=True,
                cluster=QA_config.cluster),
                            total=num_steps):

            #for n, p in translator.model.named_parameters():
            #    print(n)
            #    print(p)
            #print(p.requires_grad)

            start = datetime.now()
            to_input(batches[0][1].data['q'], config.RL_path + config.RL_file)

            # obtain rewrite and log_prob
            q, scores, log_prob = translator.translate(NMT_config.src_dir,
                                                       NMT_config.src,
                                                       NMT_config.tgt,
                                                       NMT_config.batch_size,
                                                       NMT_config.attn_debug)

            q, cq = ref_query(q)
            batches[0][1].data['q'] = q
            batches[0][1].data['cq'] = cq

            log_prob = torch.stack(log_prob).squeeze(-1)
            #print(log_prob)

            translator.model.zero_grad()

            QA_mod(batches)

            e = F1_eval.get_evaluation(batches, False, NMT_config, config,
                                       translator)
            reward = Variable(torch.FloatTensor(e.f1s), requires_grad=False)
            #print(reward)

            ## Initial loss
            loss = create_loss(log_prob, reward)

            loss.backward()
            optimizer.step()