Exemplo n.º 1
0
            os.path.join(root_dir, 'prediction.json'))
    setattr(options, 'model_time', strftime('%H:%M:%S', gmtime()))
    logger.info('data loading complete!')

    options.old_model = best_model_file_name
    options.old_ema = best_ema

    answer_append_sentences = joblib.load(
        'sampled_perturb_answer_sentences.pkl')
    question_append_sentences = joblib.load(
        'sampled_perturb_question_sentences.pkl')

    model = BiDAF(options, data.WORD.vocab.vectors).to(device)
    if options.old_model is not None:
        model.load_state_dict(
            torch.load(options.old_model,
                       map_location="cuda:{}".format(options.gpu)))
    if options.old_ema is not None:
        # ema = pickle.load(open(options.old_ema, "rb"))
        ema = torch.load(options.old_ema, map_location=device)
    else:
        ema = EMA(options.exp_decay_rate)
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)

    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
def predict():
    parser = argparse.ArgumentParser()
    parser.add_argument('--char-dim', default=8, type=int)
    parser.add_argument('--char-channel-width', default=5, type=int)
    parser.add_argument('--char-channel-size', default=100, type=int)
    parser.add_argument('--context-threshold', default=400, type=int)
    parser.add_argument('--dev-batch-size', default=100, type=int)
    parser.add_argument('--test-batch-size', default=100, type=int)
    parser.add_argument('--dev-file', default='dev-v1.1.json')
    parser.add_argument('--test-file', default='test1.json')
    parser.add_argument('--dropout', default=0.2, type=float)
    parser.add_argument('--epoch', default=12, type=int)
    parser.add_argument('--exp-decay-rate', default=0.999, type=float)
    parser.add_argument('--gpu', default=0, type=int)
    parser.add_argument('--hidden-size', default=200, type=int)
    parser.add_argument('--learning-rate', default=0.5, type=float)
    parser.add_argument('--print-freq', default=250, type=int)
    parser.add_argument('--train-batch-size', default=60, type=int)
    parser.add_argument('--train-file', default='train-v1.1.json')
    parser.add_argument('--word-dim', default=300, type=int)
    args = parser.parse_args()

    print('loading SQuAD data...')
    current_dir = os.getcwd()
    current_dir = os.path.join(current_dir, 'BiDAF')
    path = os.path.join(current_dir, 'testing_files')
    data = SQuAD(args, path)
    setattr(args, 'char_vocab_size', len(data.CHAR.vocab))
    setattr(args, 'word_vocab_size', len(data.WORD.vocab))
    setattr(args, 'dataset_file', f'testing_files/{args.dev_file}')
    setattr(args, 'prediction_file', f'output/prediction{time.time()}.out')
    setattr(args, 'model_time', strftime('%H:%M:%S', gmtime()))
    print('data loading complete!')

    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = BiDAF(args, data.WORD.vocab.vectors).to(device)

    # load trained the parameters to the model
    model_path = os.path.join(current_dir, 'saved_models')
    model.load_state_dict(
        torch.load(os.path.join(model_path, 'BiDAF_tl_new.pt')))
    model.eval()  # show the model result

    answers = dict()  # answers to be saved in dictionary format
    with torch.set_grad_enabled(False):
        for batch in iter(data.test_iter):
            # print(batch)
            p1, p2 = model(batch)
            # batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
            # loss += batch_loss.item()

            batch_size, c_len = p1.size()
            ls = nn.LogSoftmax(dim=1)
            mask = (torch.ones(c_len, c_len) *
                    float('-inf')).to(device).tril(-1).unsqueeze(0).expand(
                        batch_size, -1, -1)
            score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
            score, s_idx = score.max(dim=1)
            score, e_idx = score.max(dim=1)
            s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()
            for i in range(batch_size):
                id = batch.id[i]
                answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                answer = ' '.join(
                    [data.WORD.vocab.itos[idx] for idx in answer])
                answers[id] = answer

    print(answers)
    return answers
Exemplo n.º 3
0
def train(args, data):
    if args.load_model != "":
        model = BiDAF(args, data.WORD.vocab.vectors)
        model.load_state_dict(torch.load(args.load_model))
    else:
        model = BiDAF(args, data.WORD.vocab.vectors)
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    for name, i in model.named_parameters():
        if not i.is_leaf:
            print(name,i)

    writer = SummaryWriter(log_dir='runs/' + args.model_name)
    best_model = None

    for iterator, dev_iter, dev_file_name, index, print_freq, lr in zip(data.train_iter, data.dev_iter, args.dev_files, range(len(data.train)), args.print_freq, args.learning_rate):
        # print
        # (iterator[0])
        embed()
        exit(0)
        optimizer = optim.Adadelta(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        model.train()
        loss, last_epoch = 0, 0
        max_dev_exact, max_dev_f1 = -1, -1
        print(f"Training with {dev_file_name}")
        print()
        for i, batch in tqdm(enumerate(iterator), total=len(iterator) * args.epoch[index], ncols=100):
            present_epoch = int(iterator.epoch)
            eva = False
            if present_epoch == args.epoch[index]:
                break
            if present_epoch > last_epoch:
                print('epoch:', present_epoch + 1)
                eva = True
            last_epoch = present_epoch

            p1, p2 = model(batch)

            optimizer.zero_grad()
            batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
            loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()

            for name, param in model.named_parameters():
                if param.requires_grad:
                    ema.update(name, param.data)

            torch.cuda.empty_cache()
            if (i + 1) % print_freq == 0 or eva:
                dev_loss, dev_exact, dev_f1 = test(model, ema, args, data, dev_iter, dev_file_name)
                c = (i + 1) // print_freq

                writer.add_scalar('loss/train', loss, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('exact_match/dev', dev_exact, c)
                writer.add_scalar('f1/dev', dev_f1, c)
                print()
                print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                      f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

                if dev_f1 > max_dev_f1:
                    max_dev_f1 = dev_f1
                    max_dev_exact = dev_exact
                    best_model = copy.deepcopy(model)

                loss = 0
                model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')
    print("testing with test batch on best model")
    test_loss, test_exact, test_f1 = test(best_model, ema, args, data, list(data.test_iter)[-1], args.test_files[-1])

    print(f'test loss: {test_loss:.3f}'
          f' / test EM: {test_exact:.3f} / test F1: {test_f1:.3f}')
    return best_model
path = r'testing_files'
print('loading SQuAD data...')
data = SQuAD(args, path)
setattr(args, 'char_vocab_size', len(data.CHAR.vocab))
setattr(args, 'word_vocab_size', len(data.WORD.vocab))
setattr(args, 'dataset_file', f'testing_files/{args.dev_file}')
setattr(args, 'prediction_file', f'output/prediction{time.time()}.out')
setattr(args, 'model_time', strftime('%H:%M:%S', gmtime()))
print('data loading complete!')

device = torch.device(
    f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
model = BiDAF(args, data.WORD.vocab.vectors).to(device)

# load trained the parameters to the model
model.load_state_dict(torch.load(r'saved_models/BiDAF_08:03:17.pt'))
model.eval()  # show the model result

answers = dict()  # answers to be saved in dictionary format
with torch.set_grad_enabled(False):
    for batch in iter(data.dev_iter):
        # batch.to(device)
        p1, p2 = model(batch)
        # batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        # loss += batch_loss.item()

        # (batch, c_len, c_len)
        batch_size, c_len = p1.size()
        ls = nn.LogSoftmax(dim=1)
        mask = (torch.ones(c_len, c_len) *
                float('-inf')).to(device).tril(-1).unsqueeze(0).expand(