valid_dataloader = DataLoader(valid_dataset,
                              shuffle=True,
                              batch_size=hyper_params["batch_size"],
                              num_workers=4)

print("Length of training data loader is:", len(train_dataloader))
print("Length of valid data loader is:", len(valid_dataloader))

# load the model
model = BiDAF(word_vectors=word_embedding_matrix,
              char_vectors=char_embedding_matrix,
              hidden_size=hyper_params["hidden_size"],
              drop_prob=hyper_params["drop_prob"])
if hyper_params["pretrained"]:
    model.load_state_dict(torch.load(os.path.join(experiment_path, "model.pkl"))["state_dict"])
model.to(device)

# define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(model.parameters(), hyper_params["learning_rate"], weight_decay=1e-4)

# best loss so far
if hyper_params["pretrained"]:
    best_valid_loss = torch.load(os.path.join(experiment_path, "model.pkl"))["best_valid_loss"]
    epoch_checkpoint = torch.load(os.path.join(experiment_path, "model_last_checkpoint.pkl"))["epoch"]
    print("Best validation loss obtained after {} epochs is: {}".format(epoch_checkpoint, best_valid_loss))
else:
    best_valid_loss = 100
    epoch_checkpoint = 0
#create model
model = BiDAF(args)

if torch.cuda.is_available():
    print('use cuda')
    model.cuda()


#resume
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
if os.path.isfile(args.resume):
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.resume)
    args.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(args.resume))

ema = EMA(0.999)
for name, param in model.named_parameters():
    if param.requires_grad:
        ema.register(name, param.data)

print('parameters-----')
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data.size())
Exemplo n.º 3
0
def eval(context, question):
    with open(os.path.join(config.data_dir, "train", "word2idx.pkl"), "rb") as wi, \
         open(os.path.join(config.data_dir, "train", "char2idx.pkl"), "rb") as ci, \
         open(os.path.join(config.data_dir, "train", "word_embeddings.pkl"), "rb") as wb, \
         open(os.path.join(config.data_dir, "train", "char_embeddings.pkl"), "rb") as cb:
        word2idx = pickle.load(wi)
        char2idx = pickle.load(ci)
        word_embedding_matrix = pickle.load(wb)
        char_embedding_matrix = pickle.load(cb)

    # transform them into Tensors
    word_embedding_matrix = torch.from_numpy(
        np.array(word_embedding_matrix)).type(torch.float32)
    char_embedding_matrix = torch.from_numpy(
        np.array(char_embedding_matrix)).type(torch.float32)
    idx2word = dict([(y, x) for x, y in word2idx.items()])

    context = clean_text(context)
    context = [w for w in word_tokenize(context) if w]

    question = clean_text(question)
    question = [w for w in word_tokenize(question) if w]

    if len(context) > config.max_len_context:
        print("The context is too long. Maximum accepted length is",
              config.max_len_context, "words.")
    if max([len(w) for w in context]) > config.max_len_word:
        print("Some words in the context are longer than", config.max_len_word,
              "characters.")
    if len(question) > config.max_len_question:
        print("The question is too long. Maximum accepted length is",
              config.max_len_question, "words.")
    if max([len(w) for w in question]) > config.max_len_word:
        print("Some words in the question are longer than",
              config.max_len_word, "characters.")
    if len(question) < 3:
        print(
            "The question is too short. It needs to be at least a three words question."
        )

    context_idx = np.zeros([config.max_len_context], dtype=np.int32)
    question_idx = np.zeros([config.max_len_question], dtype=np.int32)
    context_char_idx = np.zeros([config.max_len_context, config.max_len_word],
                                dtype=np.int32)
    question_char_idx = np.zeros(
        [config.max_len_question, config.max_len_word], dtype=np.int32)

    # replace 0 values with word and char IDs
    for j, word in enumerate(context):
        if word in word2idx:
            context_idx[j] = word2idx[word]
        else:
            context_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                context_char_idx[j, k] = char2idx[char]
            else:
                context_char_idx[j, k] = 1

    for j, word in enumerate(question):
        if word in word2idx:
            question_idx[j] = word2idx[word]
        else:
            question_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                question_char_idx[j, k] = char2idx[char]
            else:
                question_char_idx[j, k] = 1

    model = BiDAF(word_vectors=word_embedding_matrix,
                  char_vectors=char_embedding_matrix,
                  hidden_size=config.hidden_size,
                  drop_prob=config.drop_prob)
    try:
        if config.cuda:
            model.load_state_dict(
                torch.load(os.path.join(config.squad_models,
                                        "model_final.pkl"))["state_dict"])
        else:
            model.load_state_dict(
                torch.load(
                    os.path.join(config.squad_models, "model_final.pkl"),
                    map_location=lambda storage, loc: storage)["state_dict"])
        print("Model weights successfully loaded.")
    except:
        pass
        print(
            "Model weights not found, initialized model with random weights.")
    model.to(device)
    model.eval()
    with torch.no_grad():
        context_idx, context_char_idx, question_idx, question_char_idx = torch.tensor(context_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(context_char_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_char_idx, dtype=torch.int64).unsqueeze(0).to(device)

        pred1, pred2 = model(context_idx, context_char_idx, question_idx,
                             question_char_idx)
        starts, ends = discretize(pred1.exp(), pred2.exp(), 15, False)
        prediction = " ".join(context[starts.item():ends.item() + 1])

    return prediction
Exemplo n.º 4
0
test_dataloader = DataLoader(test_dataset,
                             shuffle=True,
                             batch_size=hyper_params["batch_size"],
                             num_workers=4)

print("Length of test data loader is:", len(test_dataloader))

# load the model
model = BiDAF(word_vectors=word_embedding_matrix,
              char_vectors=char_embedding_matrix,
              hidden_size=hyper_params["hidden_size"],
              drop_prob=hyper_params["drop_prob"])
try:
    if config.cuda:
        model.load_state_dict(
            torch.load(os.path.join(config.squad_models,
                                    "model_final.pkl"))["state_dict"])
    else:
        model.load_state_dict(
            torch.load(
                os.path.join(config.squad_models, "model_final.pkl"),
                map_location=lambda storage, loc: storage)["state_dict"])
    print("Model weights successfully loaded.")
except:
    print("Model weights not found, initialized model with random weights.")
model.to(device)

# define loss criterion
criterion = nn.CrossEntropyLoss()

model.eval()
Exemplo n.º 5
0
def main(NMT_config):

    ### Load RL (global) configurations ###
    config = parse_args()

    ### Load trained QA model ###
    QA_checkpoint = torch.load(config.data_dir + config.QA_best_model)
    QA_config = QA_checkpoint['config']

    QA_mod = BiDAF(QA_config)
    if QA_config.use_gpu:
        QA_mod.cuda()
    QA_mod.load_state_dict(QA_checkpoint['state_dict'])

    ### Load SQuAD dataset ###
    data_filter = get_squad_data_filter(QA_config)

    train_data = read_data(QA_config,
                           'train',
                           QA_config.load,
                           data_filter=data_filter)
    dev_data = read_data(QA_config, 'dev', True, data_filter=data_filter)

    update_config(QA_config, [train_data, dev_data])

    print("Total vocabulary for training is %s" % QA_config.word_vocab_size)

    # from all
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if QA_config.lower_word else train_data.shared[
            'word2vec']
    # from filter-out set
    word2idx_dict = train_data.shared['word2idx']

    # filter-out set idx-vector
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))

    # <null> and <unk> do not have corresponding vector so random.
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(QA_config.word_emb_size), np.eye(QA_config.word_emb_size))
        for idx in range(QA_config.word_vocab_size)
    ])

    config.emb_mat = emb_mat
    config.new_emb_mat = train_data.shared['new_emb_mat']

    num_steps = int(
        math.ceil(train_data.num_examples /
                  (QA_config.batch_size *
                   QA_config.num_gpus))) * QA_config.num_epochs

    # offset for question mark
    NMT_config.max_length = QA_config.ques_size_th - 1
    NMT_config.batch_size = QA_config.batch_size

    ### Construct translator ###
    translator = make_translator(NMT_config, report_score=True)

    ### Construct optimizer ###
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 translator.model.parameters()),
                          lr=config.lr)

    ### Start RL training ###
    count = 0
    QA_mod.eval()
    F1_eval = F1Evaluator(QA_config, QA_mod)
    #eval_model(QA_mod, train_data, dev_data, QA_config, NMT_config, config, translator)

    for i in range(config.n_episodes):
        for batches in tqdm(train_data.get_multi_batches(
                QA_config.batch_size,
                QA_config.num_gpus,
                num_steps=num_steps,
                shuffle=True,
                cluster=QA_config.cluster),
                            total=num_steps):

            #for n, p in translator.model.named_parameters():
            #    print(n)
            #    print(p)
            #print(p.requires_grad)

            start = datetime.now()
            to_input(batches[0][1].data['q'], config.RL_path + config.RL_file)

            # obtain rewrite and log_prob
            q, scores, log_prob = translator.translate(NMT_config.src_dir,
                                                       NMT_config.src,
                                                       NMT_config.tgt,
                                                       NMT_config.batch_size,
                                                       NMT_config.attn_debug)

            q, cq = ref_query(q)
            batches[0][1].data['q'] = q
            batches[0][1].data['cq'] = cq

            log_prob = torch.stack(log_prob).squeeze(-1)
            #print(log_prob)

            translator.model.zero_grad()

            QA_mod(batches)

            e = F1_eval.get_evaluation(batches, False, NMT_config, config,
                                       translator)
            reward = Variable(torch.FloatTensor(e.f1s), requires_grad=False)
            #print(reward)

            ## Initial loss
            loss = create_loss(log_prob, reward)

            loss.backward()
            optimizer.step()