예제 #1
0
def main_targeted():
    from args import args
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', required=True)
    parser.add_argument('--fold', required=True)
    _args = parser.parse_args()
    args.load_model_dir = _args.model
    args.fold = _args.fold
    out_dir = prepare_output_dir(args, 'results')
    print('Generating [{}] targeted rawr data from [{}].'.format(args.fold, args.load_model_dir))
    pkl_dir = os.path.join(out_dir, '{}.targeted.pkl'.format(args.fold))
    print('Saving to {}'.format(pkl_dir))

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        
    train, dev, dev_y, embedding, opt = load_data(args)
    data = {'train': train, 'dev': dev}

    state_dict = torch.load(args.load_model_dir)['state_dict']
    model = DocReaderModel(vars(opt), embedding, state_dict)
    model.cuda()

    batches = {}
    batches['train'] = BatchGen(
            [x[:8] for x in train], batch_size=30,
            pos_size=args.pos_size, ner_size=args.ner_size,
            gpu=args.cuda, evaluation=True)
    batches['dev'] = BatchGen(dev, batch_size=30,
            pos_size=args.pos_size, ner_size=args.ner_size,
            gpu=args.cuda, evaluation=True)

    all_reduced = []
    all_removed = []
    example_idx = 0
    for batch_i, batch in enumerate(tqdm(batches[args.fold])):
        # if batch_i > 10:
        #     break
        n_examples = batch[1].shape[0]
        answers, _, score_s, score_e, _, _ = model.predict(batch, get_all=True)
        target_s = Variable(torch.max(score_s, 1)[1]).cuda()
        target_e = Variable(torch.max(score_e, 1)[1]).cuda()
        reduced, removed = get_targeted_rawr(
                model, batch, answers, target_s, target_e, max_beam_size=5)
        for i in range(n_examples):
            idx = example_idx + i
            assert batch[8][i] == data[args.fold][idx][7] # check if the spans match
            all_reduced.append([])
            for j, e in enumerate(reduced[i]):
                x = list(data[args.fold][idx])
                x[5] = e 
                all_reduced[-1].append(x)
            all_removed.append(removed[i])
        example_idx += n_examples

    with open(pkl_dir, 'wb') as f:
        ckp = {'reduced': all_reduced, 'removed': all_removed}
        pickle.dump(ckp, f)
예제 #2
0
파일: rawr.py 프로젝트: ihsgnef/rawr_snli
def main():
    from args import args
    parser = argparse.ArgumentParser()
    parser.add_argument('--fold', required=True)
    parser.add_argument('--model', required=True)
    _args = parser.parse_args()
    args.fold = _args.fold
    args.load_model_dir = _args.model

    out_dir = prepare_output_dir(args, 'results')
    print('Generating [{}] rawr data from [{}].'.format(
        args.fold, args.load_model_dir))
    print(out_dir)

    file_dir = os.path.join(out_dir, '{}.pkl'.format(args.fold))
    print('Saving to {}'.format(file_dir))

    input_field = data.Field(lower=args.lower)
    output_field = data.Field(sequential=False)
    train, dev, test = datasets.SNLI.splits(input_field,
                                            output_field,
                                            root=args.data_root)
    input_field.build_vocab(train, dev, test)
    output_field.build_vocab(train)
    input_field.vocab.vectors = torch.load(args.vector_cache)

    train_iter, dev_iter, test_iter = data.BucketIterator.splits(
        (train, dev, test), batch_size=300, device=args.gpu)

    config = args
    config.n_embd = len(input_field.vocab)
    config.d_out = len(output_field.vocab)
    config.n_cells = config.n_layers
    if config.birnn:
        config.n_cells *= 2

    model = torch.load(
        args.load_model_dir,
        map_location=lambda storage, location: storage.cuda(args.gpu))
    iters = {'train': train_iter, 'dev': dev_iter}
    reduced = process(model, iters[args.fold])
    checkpoint = {
        'data': reduced,
        'input_vocab': input_field.vocab.itos,
        'output_vocab': output_field.vocab.itos
    }

    print(sum(len(x[0]['reduced_hypothesis']) for x in reduced) / len(reduced))

    with open(file_dir, 'wb') as f:
        pickle.dump(checkpoint, f)
예제 #3
0
def main():
    from args import args
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--model', required=True)
    # parser.add_argument('--train', required=True)
    # parser.add_argument('--dev', required=True)
    # args.load_model_dir = parser.parse_args().model
    # args.ent_train_dir = parser.parse_args().train
    # args.ent_dev_dir = parser.parse_args().dev
    args.load_model_dir = '/scratch0/shifeng/rawr/drqa/original.pt'
    args.ent_train_dir = 'results/20180217T172242.135276/train.pkl'
    args.ent_dev_dir = 'pkls/original.rawr.dev.pkl'
    args.other_train_dir = 'results/targeted_train_all.pkl'
    out_dir = prepare_output_dir(args, '/scratch0/shifeng/rawr/drqa/')

    log = logging.getLogger(__name__)
    log.setLevel(logging.DEBUG)
    fh = logging.FileHandler(os.path.join(out_dir, 'output.log'))
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    log.addHandler(fh)
    log.addHandler(ch)
    log.info('===== {} ====='.format(out_dir))

    with open(os.path.join(out_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)
    
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    log.info('loading regular data from {}'.format(args.data_file))
    train_reg, dev_reg, dev_y, embedding, opt = load_data(args)
    log.info('{} regular training examples'.format(len(train_reg)))
    log.info('{} regular dev examples'.format(len(dev_reg)))
    # log.info(opt)

    ''' load data for regularization '''
    log.info('loading entropy training data from {}'.format(args.ent_train_dir))
    with open(args.ent_train_dir, 'rb') as f:
        train_ent = pickle.load(f)
        if isinstance(train_ent, dict) and 'reduced' in train_ent:
            train_ent = train_ent['reduced']
        if isinstance(train_ent[0][0], list):
            train_ent = list(itertools.chain(*train_ent))

    log.info('loading targeted training data from {}'.format(args.other_train_dir))
    with open(args.other_train_dir, 'rb') as f:
        other_train_ent = pickle.load(f)
        if isinstance(other_train_ent, dict) and 'reduced' in train_ent:
            other_train_ent = other_train_ent['reduced']
        if isinstance(other_train_ent[0][0], list):
            other_train_ent = list(itertools.chain(*other_train_ent))
    train_ent += other_train_ent

    if args.filter_long > 0:
        train_ent = [x for x in train_ent if len(x[5]) < args.filter_long]

    log.info('loading entropy dev data from {}'.format(args.ent_train_dir))
    with open(args.ent_dev_dir, 'rb') as f:
        dev_ent = pickle.load(f)['reduced']
        if isinstance(dev_ent[0], list):
            # dev_ent = list(itertools.chain(*dev_ent))
            dev_ent = [x[0] for x in dev_ent]
        # if args.filter_long > 0:
        #     dev_ent = [x for x in dev_ent if len(x[5]) > args.filter_long]
    log.info('{} entropy training examples'.format(len(train_ent)))
    log.info('{} entropy dev examples'.format(len(dev_ent)))

    log.info('loading model from {}'.format(args.load_model_dir))
    checkpoint = torch.load(args.load_model_dir)
    # opt = checkpoint['config']
    state_dict = checkpoint['state_dict']
    model = DocReaderModel(vars(opt), embedding, state_dict)
    model.cuda()

    ''' initial evaluation '''
    dev_reg_batches = BatchGen(
            dev_reg, batch_size=args.batch_size,
            pos_size=args.pos_size, ner_size=args.ner_size,
            evaluation=True, gpu=args.cuda)
    dev_ent_batches = BatchGen(
            dev_ent, batch_size=args.batch_size,
            pos_size=args.pos_size, ner_size=args.ner_size,
            evaluation=True, gpu=args.cuda)
    predictions = []
    for batch in dev_reg_batches:
        predictions.extend(model.predict(batch))
    em, f1 = score(predictions, dev_y)
    ents, predictions_r = [], []
    for batch in dev_ent_batches:
        p, _, ss, se, _, _ = model.predict(batch, get_all=True)
        ss = ss.cpu().numpy()
        se = se.cpu().numpy()
        ents.append(scipy.stats.entropy(ss.T).sum() + \
                    scipy.stats.entropy(se.T).sum())
        predictions_r.extend(p)
    ent = sum(ents) / len(ents)
    em_r, f1_r = score(predictions_r, dev_y)
    log.info("[dev EM: {:.5f} F1: {:.5f} Ent: {:.5f}]".format(em, f1, ent))
    log.info("[dev EMR: {:.5f} F1R: {:.5f}]".format(em_r, f1_r))
    best_f1_score = f1

    ''' interleaved training '''
    train_ent_batches = BatchGen(
            train_ent, batch_size=args.batch_size,
            pos_size=args.pos_size, ner_size=args.ner_size, gpu=args.cuda)
    len_train_ent_batches = len(train_ent_batches)
    train_ent_batches = iter(train_ent_batches)
    n_reg = 0
    n_ent = 0
    for epoch in range(args.epochs):
        log.warning('Epoch {}'.format(epoch))
        train_reg_batches = BatchGen(
                train_reg, batch_size=args.batch_size,
                pos_size=args.pos_size, ner_size=args.ner_size, gpu=args.cuda)
        start = datetime.now()

        for i_reg, reg_batch in enumerate(train_reg_batches):
            model.update(reg_batch)
            n_reg += 1
            if n_reg > args.start_ent:
                if i_reg % args.n_reg_per_ent == 0:
                    for j in range(args.n_ent_per_reg):
                        try:
                            model.update_entropy(next(train_ent_batches),
                                    gamma=args.gamma)
                            n_ent += 1
                        except StopIteration:
                            n_ent = 0
                            train_ent_batches = iter(BatchGen(
                                train_ent, batch_size=args.batch_size,
                                pos_size=args.pos_size, ner_size=args.ner_size,
                                gpu=args.cuda))

            if n_reg % args.n_report == 0:
                log.info('epoch [{:2}] batch [{}, {}] loss[{:.5f}] entropy[{:.5f}]'.format(
                    epoch, i_reg, n_ent, model.train_loss.avg,
                    -model.entropy_loss.avg / args.gamma))
        
            # if n_reg % args.n_eval == 0:
        dev_reg_batches = BatchGen(
                dev_reg, batch_size=args.batch_size,
                pos_size=args.pos_size, ner_size=args.ner_size,
                evaluation=True, gpu=args.cuda)
        dev_ent_batches = BatchGen(
                dev_ent, batch_size=args.batch_size,
                pos_size=args.pos_size, ner_size=args.ner_size,
                evaluation=True, gpu=args.cuda)

        ''' regular evaluation '''
        predictions = []
        for batch in dev_reg_batches:
            predictions.extend(model.predict(batch))
        em, f1 = score(predictions, dev_y)

        ''' entropy evaluation '''
        ents, predictions_r = [], []
        for batch in dev_ent_batches:
            p, _, ss, se, _, _ = model.predict(batch, get_all=True)
            ss = ss.cpu().numpy()
            se = se.cpu().numpy()
            ents.append(scipy.stats.entropy(ss.T).sum() + \
                        scipy.stats.entropy(se.T).sum())
            predictions_r.extend(p)
        ent = sum(ents) / len(ents)
        em_r, f1_r = score(predictions_r, dev_y)

        log.info("dev EM: {:.5f} F1: {:.5f} Ent: {:.5f}".format(em, f1, ent))
        log.info("[dev EMR: {:.5f} F1R: {:.5f}]".format(em_r, f1_r))

        ''' save best model '''
        if f1 > best_f1_score:
            best_f1_score = f1
            model_file = os.path.join(out_dir, 'best_model.pt')
            model.save(model_file, epoch)
            log.info('[save best model F1: {:.5f}]'.format(best_f1_score))

        ''' save models '''
        model_file = os.path.join(
                out_dir, 'checkpoint_epoch_{}.pt'.format(epoch))
        model.save(model_file, epoch)
        log.info("[save model {}]".format(model_file))
예제 #4
0
def main():
    from args import conf, tune_conf
    parser = argparse.ArgumentParser()
    parser.add_argument('--baseline', default='results/baseline.pt')
    parser.add_argument(
        '--ent-train',
        default='/scratch0/shifeng/rawr/new_snli/rawr.train.pkl')
    parser.add_argument('--ent-dev',
                        default='/scratch0/shifeng/rawr/new_snli/rawr.dev.pkl')
    args = parser.parse_args()

    out_dir = prepare_output_dir(args, args.root_dir)
    log = logging.getLogger(__name__)
    log.setLevel(logging.DEBUG)
    fh = logging.FileHandler(os.path.join(out_dir, 'output.log'))
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter(fmt='%(asctime)s %(message)s',
                                  datefmt='%m/%d/%Y %I:%M:%S')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    log.addHandler(fh)
    log.addHandler(ch)
    log.info('===== {} ====='.format(out_dir))
    ''' load regular data '''
    log.info('loading regular training data')
    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len

    log.info('loading entropy dev data {}'.format(tune_conf.ent_dev))
    with open(tune_conf.ent_dev, 'rb') as f:
        ent_dev = pickle.load(f)
    if isinstance(ent_dev[0], list):
        ent_dev = list(itertools.chain(*ent_dev))
    log.info('{} entropy dev examples'.format(len(ent_dev)))
    ent_dev = [[
        x['data']['premise'], x['data']['hypothesis'], x['data']['label']
    ] for x in ent_dev]

    log.info('loading entropy training data {}'.format(tune_conf.ent_train))
    with open(tune_conf.ent_train, 'rb') as f:
        ent_train = pickle.load(f)
    if isinstance(ent_train[0], list):
        ent_train = list(itertools.chain(*ent_train))
    log.info('{} entropy training examples'.format(len(ent_train)))
    ent_train = [[
        x['data']['premise'], x['data']['hypothesis'], x['data']['label']
    ] for x in ent_train]

    train_ent_batches = batchify(ent_train, tune_conf.batch_size)
    log.info('{} entropy training batches'.format(len(train_ent_batches)))

    log.info('loading model from {}'.format(args.baseline))
    model = BIMPM(conf, data)
    model.load_state_dict(torch.load(args.baseline))
    # model.word_emb.weight.requires_grad = True
    model.cuda(conf.gpu)

    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    optimizer = optim.Adam(parameters, lr=tune_conf.lr)
    ent_optimizer = optim.Adam(parameters, lr=tune_conf.ent_lr)
    criterion = nn.CrossEntropyLoss()

    init_loss, init_acc = evaluate(model, data.dev_iter)
    log.info("initial loss {:.4f} accuracy {:.4f}".format(init_loss, init_acc))
    best_acc = init_acc

    dev_ent_batches = batchify(ent_dev, tune_conf.batch_size)
    init_ent, init_ent_acc = evaluate_ent(model, dev_ent_batches)
    log.info("initial entropy {:.4f} ent_acc {:.4f}".format(
        init_ent, init_ent_acc))

    epoch = 0
    i_ent, i_mle = 0, 0  # number of examples
    train_loss, train_ent = 0, 0
    train_mle_iter = iter(data.train_iter)
    train_ent_iter = iter(train_ent_batches)
    while True:
        model.train()
        for i in range(tune_conf.n_ent):
            try:
                prem, hypo, label = next(train_ent_iter)
            except StopIteration:
                random.shuffle(train_ent_batches)
                train_ent_iter = iter(train_ent_batches)
                i_ent = 0
                train_ent = 0
                break
            output = forward(model, prem, hypo, conf.max_sent_len)
            output = F.softmax(output, 1)
            ent = entropy(output).sum()
            train_ent += ent.data.cpu().numpy()[0]
            loss = -tune_conf.gamma * ent
            ent_optimizer.zero_grad()
            loss.backward()
            ent_optimizer.step()
            i_ent += prem.shape[0]

        end_of_epoch = False
        for i in range(tune_conf.n_mle):
            if i_mle >= len(data.train_iter):
                epoch += 1
                end_of_epoch = True
                data.train_iter.init_epoch()
                train_mle_iter = iter(data.train_iter)
                i_mle = 0
                train_loss = 0
                break
            batch = next(train_mle_iter)
            output = forward(model, batch.premise, batch.hypothesis,
                             conf.max_sent_len)
            loss = criterion(output, batch.label)
            train_loss += loss.data.cpu().numpy()[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            i_mle += batch.premise.shape[0]

        if i_mle % 1000 == 0:
            _loss = train_loss / i_mle if i_mle != 0 else 0
            _ent = train_ent / i_ent if i_ent != 0 else 0
            log.info(
                'epoch [{:2}] [{} / {}] loss[{:.5f}] entropy[{:.5f}]'.format(
                    epoch, i_mle, len(data.train_iter), _loss, _ent))

        if end_of_epoch or i_mle % 1e5 == 0:
            dev_loss, dev_acc = evaluate(model, data.dev_iter)
            dev_ent, dev_ent_acc = evaluate_ent(model, dev_ent_batches)
            log.info("dev acc: {:.4f} ent: {:.4f} ent_acc: {:.4f}".format(
                dev_acc, dev_ent, dev_ent_acc))
            model_path = os.path.join(out_dir,
                                      'checkpoint_epoch_{}.pt'.format(epoch))
            torch.save(model.state_dict(), model_path)
            if dev_acc > best_acc:
                best_acc = dev_acc
                model_path = os.path.join(out_dir, 'best_model.pt')
                torch.save(model.state_dict(), model_path)
                log.info("best model saved {}".format(dev_acc))

        if epoch > 40:
            break
예제 #5
0
def main():
    from args import args
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', required=True)
    _args = parser.parse_args()
    args.load_model_dir = _args.model

    out_dir = prepare_output_dir(args, 'results')
    print('Generating targeted rawr data from [{}].'.format(
        args.load_model_dir))
    print('Saving to {}'.format(out_dir))

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    train, dev, dev_y, embedding, opt = load_data(args)
    data = {'train': train, 'dev': dev}

    state_dict = torch.load(args.load_model_dir)['state_dict']
    model = DocReaderModel(vars(opt), embedding, state_dict)
    model.cuda()

    # get answers and targets for original question
    regular_train = BatchGen([x[:8] for x in train],
                             batch_size=30,
                             pos_size=args.pos_size,
                             ner_size=args.ner_size,
                             gpu=args.cuda,
                             evaluation=True)

    all_answers, all_target_s, all_target_e = [], [], []
    for i, batch in enumerate(tqdm(regular_train)):
        # if i > 10:
        #     break
        answers, _, score_s, score_e, _, _ = model.predict(batch, get_all=True)
        target_s = np.argmax(score_s, 1).tolist()
        target_e = np.argmax(score_e, 1).tolist()
        all_answers += answers
        all_target_s += target_s
        all_target_e += target_e

    all_train = zip(train, all_answers, all_target_s, all_target_s)

    groups = defaultdict(list)
    for x in all_train:
        groups[x[0][0][:-2]].append(x)

    train_with_other = []
    other_answers = []
    other_target_s = []
    other_target_e = []
    for group in groups.values():
        data, answer, target_s, target_e = list(map(list, zip(*group)))
        for i in range(len(data)):
            for j in range(len(data)):
                if answer[i] != answer[j]:
                    train_with_other.append(data[i])
                    other_answers.append(answer[j])
                    other_target_s.append(target_s[j])
                    other_target_e.append(target_e[j])

    other_train = BatchGen([x[:8] for x in train_with_other],
                           batch_size=30,
                           pos_size=args.pos_size,
                           ner_size=args.ner_size,
                           gpu=args.cuda,
                           evaluation=True)

    targeted_train = []
    start = 0
    for i, batch in enumerate(tqdm(other_train)):
        batch_size = batch[5].shape[0]
        end = start + batch_size
        # if i >= 2500:
        #     break

        # if i < 2500 or i == 2530:
        #     start = end
        #     continue
        # if i >=5000:
        #     break

        # if i < 5000:
        #     start = end
        #     continue
        # if i >= 7500:
        #     break

        if i < 7500:
            start = end
            continue

        ts = Variable(torch.LongTensor(other_target_s[start:end])).cuda()
        te = Variable(torch.LongTensor(other_target_e[start:end])).cuda()
        ans = other_answers[start:end]
        reduced, _ = get_targeted_rawr(model, batch, ans, ts, te)
        for j in range(batch_size):
            if len(reduced[j]) == 0:
                continue
            for r in reduced[j]:
                x = train_with_other[start + j]
                x[5] = r
                targeted_train.append(x)
        start = end

    print(len(targeted_train))

    out_path = os.path.join(out_dir, 'targeted_other.train.pkl')
    with open(out_path, 'wb') as f:
        pickle.dump(targeted_train, f)
예제 #6
0
def main():

    # Prevent numpy from using multiple threads
    os.environ['OMP_NUM_THREADS'] = '1'

    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('processes', type=int)
    parser.add_argument('rom', type=str)
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--outdir', type=str, default=None)

    # sdl - for recording/displaying etc
    parser.add_argument('--use-sdl', action='store_true')

    # maximum 5 timesteps ?
    parser.add_argument('--t-max', type=int, default=5)

    # entropy
    parser.add_argument('--beta', type=float, default=1e-2)
    parser.add_argument('--profile', action='store_true')

    parser.add_argument('--steps', type=int, default=8 * 10**7)
    #parser.add_argument('--lr', type=float, default=7e-4)
    parser.add_argument('--lr', type=float, default=7e-3)
    parser.add_argument('--eval-frequency', type=int, default=10**6)
    parser.add_argument('--eval-n-runs', type=int, default=10)
    parser.add_argument('--weight-decay', type=float, default=0.0)
    parser.add_argument('--use-lstm', action='store_true')
    parser.set_defaults(use_sdl=False)
    parser.set_defaults(use_lstm=False)
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        np.random.seed(args.seed)

    args.outdir = util.prepare_output_dir(args, args.outdir)

    print('Output files are saved in {}'.format(args.outdir))

    n_actions = ale.ALE(args.rom).number_of_actions

    def model_opt():
        model = Model(n_actions)
        model.learning_rate = args.lr
        #opt = rmsprop_async.RMSpropAsync(lr=7e-4, eps=1e-1, alpha=0.99)

        return model

    # creates network
    model = model_opt()

    # shared stuff
    shared_params = util.share_params_as_shared_arrays(model)

    # define locks here
    max_score = mp.Value('f', np.finfo(np.float32).min)
    counter = mp.Value('l', 0)
    start_time = time.time()

    # Write a header line first
    # so f****n awesome code
    with open(os.path.join(args.outdir, 'scores.txt'), 'a+') as f:
        column_names = ('steps', 'elapsed', 'mean', 'median', 'stdev')
        f.write('\t'.join(column_names) + '\n')

    def run_func(process_idx):
        env = ale.ALE(args.rom, use_sdl=args.use_sdl)
        # creates local model
        model = model_opt()
        # set shared params
        util.set_shared_params(model, shared_params)

        # create an agent
        agent = A3C(model,
                    args.t_max,
                    beta=args.beta,
                    process_idx=process_idx,
                    phi=util.dqn_phi)

        # train the loop
        train_loop(process_idx, counter, max_score, args, agent, env,
                   start_time)

    util.run_async(args.processes, run_func)
예제 #7
0
파일: rawr.py 프로젝트: ihsgnef/pathologies
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fold', required=True)
    parser.add_argument('--baseline', default='results/baseline.pt')
    parser.add_argument('--pnoth',
                        default=False,
                        action='store_true',
                        help='reduce premise instead of hypothesis')
    parser.add_argument('--truth',
                        default=False,
                        action='store_true',
                        help='use label instead of prediction as target')
    args = parser.parse_args()

    data = SNLI(conf)
    conf.char_vocab_size = len(data.char_vocab)
    conf.word_vocab_size = len(data.TEXT.vocab)
    conf.class_size = len(data.LABEL.vocab)
    conf.max_word_len = data.max_word_len
    q_vocab = data.TEXT.vocab.itos
    a_vocab = data.LABEL.vocab.itos

    out_dir = prepare_output_dir(conf, 'results', 'rawr')
    print('Generating [{}] rawr data from [{}].'.format(
        args.fold, args.baseline))
    print(out_dir)

    model = BIMPM(conf, data)
    model.load_state_dict(torch.load(args.baseline))
    model.word_emb.weight.requires_grad = True
    model.to(conf.device)

    datasets = {'train': data.train_iter, 'dev': data.dev_iter}

    if args.pnoth:
        fname = 'rawr.{}.premise.pkl'.format(args.fold)
    else:
        fname = 'rawr.{}.hypothesis.pkl'.format(args.fold)

    checkpoint = []
    for batch_i, batch in enumerate(tqdm(datasets[args.fold])):
        if batch_i > len(datasets[args.fold]):
            # otherwise train iter will loop forever!
            break
        batch_size = batch.hypothesis.shape[0]
        model.eval()
        output = F.softmax(model(batch.premise, batch.hypothesis), 1)
        original_scores, original_predictions = torch.max(output, 1)
        original_scores = original_scores.detach().cpu().numpy()
        original_predictions = original_predictions.detach().cpu().numpy()
        batch_cpu = Batch(batch.premise.data.cpu(),
                          batch.hypothesis.data.cpu(), batch.label.data.cpu())

        reduced, removed_indices = get_rawr(
            model,
            batch,
            max_beam_size=rawr_conf.max_beam_size,
            conf_threshold=rawr_conf.conf_threshold,
            p_not_h=args.pnoth)
        for i in range(batch_size):
            og = {
                'premise': batch_cpu.premise[i],
                'hypothesis': batch_cpu.hypothesis[i],
                'premise_readable': to_text(batch_cpu.premise[i], q_vocab),
                'hypothesis_readable': to_text(batch_cpu.hypothesis[i],
                                               q_vocab),
                'prediction': original_predictions[i],
                'prediction_readable': a_vocab[original_predictions[i]],
                'score': original_scores[i],
                'label': batch_cpu.label[i],
                'label_readable': a_vocab[batch_cpu.label[i]]
            }
            checkpoint.append({'original': og, 'reduced': []})
            s1 = batch.hypothesis[i] if args.pnoth else batch.premise[i]
            s1 = s1.to(conf.device)
            for j, s2 in enumerate(reduced[i]):
                s2 = torch.LongTensor(s2).to(conf.device)
                model.eval()
                if args.pnoth:
                    output = model(s2.unsqueeze(0), s1.unsqueeze(0))
                else:
                    output = model(s1.unsqueeze(0), s2.unsqueeze(0))
                output = F.softmax(output, 1)
                pred_scores, pred = torch.max(output, 1)
                pred = pred.detach().cpu().numpy()[0]
                pred_scores = pred_scores.detach().cpu().numpy()[0]
                if args.pnoth:
                    hypo, prem = s1.cpu(), s2.cpu()
                else:
                    prem, hypo = s1.cpu(), s2.cpu()
                checkpoint[-1]['reduced'].append({
                    'premise':
                    prem,
                    'hypothesis':
                    hypo,
                    'premise_readable':
                    to_text(prem, q_vocab),
                    'hypothesis_readable':
                    to_text(hypo, q_vocab),
                    'prediction':
                    pred,
                    'prediction_readable':
                    a_vocab[pred],
                    'score':
                    pred_scores,
                    'label':
                    batch_cpu.label[i],
                    'label_readable':
                    a_vocab[batch_cpu.label[i]],
                    'removed_indices':
                    removed_indices[i][j],
                    'which_reduced':
                    'premise' if args.pnoth else 'hypothesis'
                })
        if batch_i % 1000 == 0 and batch_i > 0:
            out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
            with open(out_path, 'wb') as f:
                pickle.dump(checkpoint, f)
            checkpoint = []

    if len(checkpoint) > 0:
        out_path = os.path.join(out_dir, '{}.{}'.format(fname, batch_i))
        with open(out_path, 'wb') as f:
            pickle.dump(checkpoint, f)
예제 #8
0
def main():
    from args import conf

    parser = argparse.ArgumentParser()
    parser.add_argument('--train', default='results/rawr.train.hypothesis.pkl')
    parser.add_argument('--dev', default='results/rawr.dev.hypothesis.pkl')
    parser.add_argument('--truth',
                        default=False,
                        action='store_true',
                        help='use label instead of prediction as target')
    parser.add_argument('--ogdev',
                        default=False,
                        action='store_true',
                        help='use original dev set instead of reduced')
    parser.add_argument('--full',
                        default=0,
                        type=float,
                        help='amount of full examples to include')
    args = parser.parse_args()

    conf.train_data = args.train
    conf.dev_data = args.dev

    print('loading regular data...')
    regular_data = SNLI(conf)
    conf.char_vocab_size = len(regular_data.char_vocab)
    conf.word_vocab_size = len(regular_data.TEXT.vocab)
    conf.class_size = len(regular_data.LABEL.vocab)
    conf.max_word_len = regular_data.max_word_len
    conf.out_dir = prepare_output_dir(conf, 'results', 'reduced')

    print('loading reduced data from [{}]'.format(conf.train_data))
    with open(conf.train_data, 'rb') as f:
        train = pickle.load(f)
    print('loading reduced data from [{}]'.format(conf.dev_data))
    with open(conf.dev_data, 'rb') as f:
        dev = pickle.load(f)

    train_label = 'label' if args.truth else 'prediction'
    train = [(x['premise'], x['hypothesis'], ex['original'][train_label])
             for ex in train for x in ex['reduced']]
    # dev = [(x['premise'], x['hypothesis'], x['label'])
    #        for ex in dev for x in ex['reduced']]
    dev = [(x['premise'], x['hypothesis'], x['label']) for ex in dev
           for x in ex['reduced'][:1]]

    train_batches = batchify(train, conf.batch_size)

    if args.full > 0:
        n_examples = int(len(regular_data.train_iter) * args.full)
        print('use {} ({}) full training data'.format(
            n_examples * conf.batch_size, args.full))
        full_batches = []
        for j, x in enumerate(regular_data.train_iter):
            if j > n_examples:
                break
            full_batches.append((x.premise, x.hypothesis, x.label))
        # train_batches += full_batches
        train_batches = full_batches

    print(len(train_batches))

    if args.ogdev:
        dev_batches = list(regular_data.dev_iter)
        dev_batches = [(x.premise, x.hypothesis, x.label) for x in dev_batches]
    else:
        dev_batches = batchify(train, conf.batch_size)

    model = BIMPM(conf, regular_data)
    if conf.gpu > -1:
        model.cuda(conf.gpu)

    print('begin training')
    best_model = train_reduced(model, train_batches, dev_batches, conf)

    torch.save(best_model.state_dict(), os.path.join(conf.out_dir, 'best.pt'))
    print('training finished!')
예제 #9
0
파일: rawr.py 프로젝트: ihsgnef/pathologies
def main():
    from args import conf, rawr_conf

    parser = argparse.ArgumentParser()
    parser.add_argument('--model', default='results/baseline.pt')
    parser.add_argument('--fold', required=True)
    args = parser.parse_args()
    out_dir = prepare_output_dir(conf, 'results', 'rawr')

    pkl_dir = os.path.join(out_dir, '{}.pkl'.format(args.fold))
    print('Generating [{}] rawr data from [{}].'.format(args.fold, args.model))
    print('Saving to {}'.format(pkl_dir))

    random.seed(conf.seed)
    torch.manual_seed(conf.seed)
    if conf.cuda:
        torch.cuda.manual_seed(conf.seed)

    with open('data/meta.msgpack', 'rb') as f:
        vocab = msgpack.load(f, encoding='utf-8')['vocab']

    train, dev, dev_y, embedding, opt = load_data(conf)
    data = {'train': train, 'dev': dev}

    state_dict = torch.load(args.model)['state_dict']
    model = DocReaderModel(vars(opt), embedding, state_dict)
    model.cuda()

    batches = {}
    batches['train'] = BatchGen([x[:8] for x in train],
                                batch_size=30,
                                pos_size=conf.pos_size,
                                ner_size=conf.ner_size,
                                gpu=conf.cuda,
                                evaluation=True)
    batches['dev'] = BatchGen(dev,
                              batch_size=30,
                              pos_size=conf.pos_size,
                              ner_size=conf.ner_size,
                              gpu=conf.cuda,
                              evaluation=True)

    checkpoint = []
    example_idx = 0
    for batch_i, batch in enumerate(tqdm(batches[args.fold])):
        n_examples = batch[1].shape[0]

        # original predictions
        r = model.predict(batch, get_all=True)
        target = r[0]
        original_score_s = r[2].cpu().numpy()
        original_score_e = r[3].cpu().numpy()
        original_index_s = r[4]
        original_index_e = r[5]

        reduced, removed = get_rawr(model,
                                    batch,
                                    max_beam_size=rawr_conf.max_beam_size)

        for i in range(n_examples):
            beam_size = len(reduced[i])

            rq = torch.LongTensor(reduced[i])
            mask = torch.ByteTensor(np.zeros_like(rq))
            test_batch = [batch_repeat(x[i], beam_size) for x in batch[:5]]
            test_batch += [rq, mask]
            test_batch += [batch_repeat(x[i], beam_size) for x in batch[7:]]

            output = model.predict(test_batch, get_all=True)
            preds = output[0]
            reduced_score_s = output[2].cpu().numpy()
            reduced_score_e = output[3].cpu().numpy()
            reduced_index_s = output[4]
            reduced_index_e = output[5]

            idx = example_idx + i
            assert batch[8][i] == data[args.fold][idx][
                7]  # check if the spans match

            if args.fold == 'train':
                indices = data['train'][idx][i][7]
                start, end = data['train'][i][-2], data['train'][i][-1]
                start, end = indices[start][0], indices[end][1]
                label = [train[i][6][start:end]]
            else:
                label = dev_y[idx]

            og = {
                'batch':
                data[args.fold][idx],
                'label':
                label,
                'score_e':
                original_score_e[i],
                'score_s':
                original_score_s[i],
                'index_e':
                original_index_e[i],
                'index_s':
                original_index_s[i],
                'prediction':
                target[i],
                'context_readable':
                batch[7][i],
                'question_readable':
                ' '.join(vocab[x] for x in batch[5][i] if x != 0),
            }

            rd = []
            for j, e in enumerate(reduced[i]):
                x = list(data[args.fold][idx])
                x[5] = e
                rd.append({
                    'batch':
                    x,
                    'label':
                    label,
                    'removed_indices':
                    removed[i][j],
                    'context_readable':
                    batch[7][i],
                    'question_readable':
                    ' '.join(vocab[x] for x in reduced[i][j] if x != 0),
                    'score_e':
                    reduced_score_e[j],
                    'score_s':
                    reduced_score_s[j],
                    'index_e':
                    reduced_index_e[j],
                    'index_s':
                    reduced_index_s[j],
                    'prediction':
                    preds[j]
                })

            checkpoint.append({'original': og, 'reduced': rd})

        example_idx += n_examples

    with open(pkl_dir, 'wb') as f:
        pickle.dump(checkpoint, f)