示例#1
0
def main():
    args = parse_args()
    dataset = args.dataset
    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train',
                                   dictionary,
                                   dataset=dataset,
                                   cache_image_features=args.cache_features)
    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    lable2answer = eval_dset.label2ans

    bias_p = get_bias(train_dset, eval_dset)
    bias_color = bias_p['what color is']

    bias_color_top5 = bias_color.argsort()[::-1][0:5]

    bias_color_p = []
    bias_color_word = []

    for i in bias_color_top5:
        bias_color_p.append(bias_color[i])
        bias_color_word.append(lable2answer[i])

    print(bias_color_p)
    print(bias_color_word)
示例#2
0
def main():
    args = parse_args()
    dataset = args.dataset

    with open('util/qid2type_%s.json' % args.dataset, 'r') as f:
        qid2type = json.load(f)

    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    # Build the model using the original constructor
    constructor = 'build_%s' % args.model
    model = getattr(CCB_model, constructor)(eval_dset, args.num_hid).cuda()
    #model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda()

    if args.debias == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.debias == "none":
        model.debias_loss_fn = Plain()
    elif args.debias == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.debias == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    elif args.debias == "CCB_loss":
        model.debias_loss_fn = CCB_loss(args.entropy_penalty)
    else:
        raise RuntimeError(args.mode)

    model_state = torch.load(args.model_state)
    model.load_state_dict(model_state)

    model = model.cuda()
    batch_size = args.batch_size

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    # The original version uses multiple workers, but that just seems slower on my setup
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=5)

    print("Starting eval...")

    evaluate(model, eval_loader, qid2type)
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    net = Question_Classifier(args.bert_mode,
                              args.bert_pretrain,
                              num_classes=3)
    net.load_state_dict(
        torch.load(args.load_path, map_location=lambda storage, loc: storage))

    torch.cuda.set_device(device=0)
    net.cuda()

    dictionary = Dictionary.load_from_file(args.dictionary_path)
    valset = Question_Dataset('val',
                              dictionary,
                              args.data_root,
                              question_len=12)
    testset = Question_Dataset('test',
                               dictionary,
                               args.data_root,
                               question_len=12)

    valloader = DataLoader(valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=2)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2)

    net.eval()
    val_acc = 0.0
    test_acc = 0.0

    with torch.no_grad():
        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            val_acc += (tmp_acc * question.shape[0])
        val_acc /= len(valset)

        for ii, sample_batched in enumerate(testloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            test_acc += (tmp_acc * question.shape[0])
        test_acc /= len(testset)

        print('valset || questions: %d acc: %.4f' % (len(valset), val_acc))
        print('testset || questions: %d acc: %.4f' % (len(testset), test_acc))
示例#4
0
def main():
    parser = argparse.ArgumentParser(
        "Save a model's predictions for the VQA-CP test set")
    parser.add_argument("model", help="Directory of the model")
    parser.add_argument("output_file", help="File to write json output to")
    args = parser.parse_args()

    path = args.model

    print("Loading data...")
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary, cp=True)
    eval_dset = VQAFeatureDataset('val', dictionary, cp=True)

    eval_loader = DataLoader(eval_dset, 256, shuffle=False, num_workers=0)

    constructor = 'build_%s' % 'baseline0_newatt'
    model = getattr(base_model, constructor)(train_dset, 1024).cuda()

    print("Loading state dict for %s..." % path)

    state_dict = torch.load(join(path, "model.pth"))
    if all(k.startswith("module.") for k in state_dict):
        filtered = {}
        for k in state_dict:
            filtered[k[len("module."):]] = state_dict[k]
        state_dict = filtered

    for k in list(state_dict):
        if k.startswith("debias_loss_fn"):
            del state_dict[k]

    model.load_state_dict(state_dict)

    model.cuda()
    model.eval()
    print("Done")

    predictions = []
    for v, q, a, b in tqdm(eval_loader,
                           ncols=100,
                           total=len(eval_loader),
                           desc="eval"):
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()
        factor = model(v, None, q, None, None, True)[0]
        prediction = torch.max(factor, 1)[1].data.cpu().numpy()
        for p in prediction:
            predictions.append(train_dset.label2ans[p])

    out = []
    for p, e in zip(predictions, eval_dset.entries):
        out.append(dict(answer=p, question_id=e["question_id"]))
    with open(join(path, args.output_file), "w") as f:
        json.dump(out, f)
示例#5
0
def evalFromImages(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching eval data"
    imageLoader = imageModel.ImageLoader("data/val2014img", "val")
    eval_dset = VQAFeatureDataset('valSample',
                                  args.evalset_name,
                                  dictionary,
                                  imageLoader=imageLoader)

    # Fetch model.
    model = imageModel.getCombinedModel(args, eval_dset)
    model = nn.DataParallel(model).cuda()

    # Evaluate
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    print "Evaluating..."
    model.train(False)
    eval_score, bound = train.evaluate(model, eval_loader)
    print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
示例#6
0
def evalNormal(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching eval data"
    eval_dset = VQAFeatureDataset('val', args.evalset_name, dictionary)

    # Fetch model.
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')
    model = nn.DataParallel(model).cuda()
    if args.load_path:
        load_path = os.path.join(args.load_path, 'model.pth')
        print "Loading model from {}".format(load_path)
        model.load_state_dict(torch.load(load_path))

    # Evaluate
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    print "Evaluating..."
    model.train(False)
    eval_score, bound = train.evaluate(model, eval_loader)
    print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
示例#7
0
def trainNormal(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching train data"
    train_dset = VQAFeatureDataset('train', 'train', dictionary)
    print "Fetching eval data"
    eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary)

    # Fetch model.
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')
    model = nn.DataParallel(model).cuda()
    if args.load_path:
        load_path = os.path.join(args.load_path, 'model.pth')
        print "Loading model from {}".format(load_path)
        model.load_state_dict(torch.load(load_path))

    # Train.
    train_loader = DataLoader(train_dset, args.batch_size, shuffle=True)
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    train.train(model, train_loader, eval_loader, args.epochs, args.output)
示例#8
0
    with open(glove_file, 'r', encoding='utf-8') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary(config.data_path)
    d.dump_to_file('./data/dictionary.pkl')

    d = Dictionary.load_from_file('./data/dictionary.pkl')
    emb_dim = 300
    #glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    glove_file = os.path.join(config.data_glove_path,
                              os.listdir(config.data_glove_path)[2])

    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    print(
        'Evaluate a given model optimized by training split using validation split.'
    )
    args = parse_args()

    torch.backends.cudnn.benchmark = True

    if args.task == 'vqa':
        from train import evaluate
        dict_path = 'data/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        eval_dset = VQAFeatureDataset('val', dictionary, adaptive=True)

    elif args.task == 'flickr':
        from train_flickr import evaluate
        dict_path = 'data/flickr30k/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        eval_dset = Flickr30kFeatureDataset('test', dictionary)
        args.op = ''
        args.gamma = 1

    n_device = torch.cuda.device_count()
    batch_size = args.batch_size * n_device

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(eval_dset, args.num_hid, args.op,
def create_glove_embedding_init(idx2word, glove_file):
    word2emb = {}
    with open(glove_file, 'r') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        # vals = map(float, vals[1:])
        vals = [float(i) for i in vals[1:]]
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary('vqa_data')
    d.dump_to_file('vqa_data/dictionary.pkl')

    d = Dictionary.load_from_file('vqa_data/dictionary.pkl')
    emb_dim = 300
    glove_file = 'vqa_data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('vqa_data/glove6b_init_%dd.npy' % emb_dim, weights)
示例#11
0
    else:
        args.combine_with_dataroot = None
    if args.combine_with_splits is not None:
        args.combine_with_splits = args.combine_with_splits.split(",")
    args.emb_dim = 300

    args.out_dictionary_json_file = args.dataroot + '/bottom-up-attention/combined_and_individual.json'

    return args


if __name__ == '__main__':
    args = parse_args()
    dataroot = args.dataroot
    if args.old_dictionary_file is not None:
        old_dictionary = Dictionary.load_from_file(args.old_dictionary_file)
    else:
        old_dictionary = None

    d = create_dictionary(dataroot,
                          args.dataset,
                          old_dictionary=old_dictionary,
                          args=args)

    with open(os.path.join(dataroot, 'bottom-up-attention',
                           'dictionary.json')) as f:
        combined_dict = json.load(f)
        combined_word_to_ix = combined_dict['word_to_ix']

    idx2word = {}
    combined_ix_to_curr_ix = {}
示例#12
0
def main():
    args = parse_args()

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    cp = not args.nocp

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train',
                                   dictionary,
                                   cp=cp,
                                   cache_image_features=args.cache_features)
    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  cp=cp,
                                  cache_image_features=args.cache_features)

    answer_voc_size = train_dset.num_ans_candidates

    # Compute the bias:
    # The bias here is just the expected score for each answer/question type

    # question_type -> answer -> total score
    question_type_to_probs = defaultdict(Counter)
    # question_type -> num_occurances
    print("# question_type -> num_occurances")
    question_type_to_count = Counter()
    for ex in train_dset.entries:
        ans = ex["answer"]
        q_type = ans["question_type"]
        question_type_to_count[q_type] += 1
        if ans["labels"] is not None:
            for label, score in zip(ans["labels"], ans["scores"]):
                question_type_to_probs[q_type][label] += score

    question_type_to_prob_array = {}
    for q_type, count in question_type_to_count.items():
        prob_array = np.zeros(answer_voc_size, np.float32)
        for label, total_score in question_type_to_probs[q_type].items():
            prob_array[label] += total_score
        prob_array /= count
        question_type_to_prob_array[q_type] = prob_array
    print(" ... DONE")
    # Now add a `bias` field to each example
    for ds in [train_dset, eval_dset]:
        for ex in ds.entries:
            q_type = ex["answer"]["question_type"]
            ex["bias"] = question_type_to_prob_array[q_type]

    # Build the model using the original constructor
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    print("BUILT MODEL.")
    # Add the loss_fn based our arguments
    if args.mode == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.mode == "none":
        model.debias_loss_fn = Plain()
    elif args.mode == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.mode == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    else:
        raise RuntimeError(args.mode)

    # Record the bias function we are using
    utils.create_dir(args.output)
    with open(args.output + "/debias_objective.json", "w") as f:
        js = model.debias_loss_fn.to_json()
        json.dump(js, f, indent=2)

    model = model.cuda()
    print(sum(p.numel() for p in model.parameters()))
    '''
示例#13
0
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    utils.create_dir(args.output)
    logger = utils.Logger(os.path.join(args.output, 'log.txt'))
    logger.write(args.__repr__())

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary, adaptive=True)
    val_dset = VQAFeatureDataset('val', dictionary, adaptive=True)

    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid, args.op,
                                             args.gamma).cuda()

    tfidf = None
    weights = None

    if args.tfidf:
        dict = Dictionary.load_from_file('data/dictionary.pkl')
        tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'],
def create_glove_embedding_init(idx2word, glove_file):
    word2emb = {}
    with open(glove_file, 'r') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary('data')
    d.dump_to_file('data/dictionary_imsitu_final.pkl')

    d = Dictionary.load_from_file('data/dictionary_imsitu_final.pkl')
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_init_imsitu_final_%dd.npy' % emb_dim, weights)
示例#15
0
文件: eval.py 项目: ych133/VQA_ReGAT
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    if not torch.cuda.is_available():
        raise ValueError("CUDA is not available," +
                         "this code currently only support GPU.")

    n_device = torch.cuda.device_count()
    print("Found %d GPU cards for eval" % (n_device))
    device = torch.device("cuda")

    dictionary = Dictionary.load_from_file(
                 os.path.join(args.data_folder, 'glove/dictionary.pkl'))

    hps_file = f'{args.output_folder}/hps.json'
    model_hps = Struct(json.load(open(hps_file)))
    batch_size = model_hps.batch_size*n_device

    print("Evaluating on %s dataset with model trained on %s dataset" %
          (args.dataset, model_hps.dataset))
    if args.dataset == "vqa_cp":
        coco_train_features = Image_Feature_Loader(
                            'train', model_hps.relation_type,
                            adaptive=model_hps.adaptive,
                            dataroot=model_hps.data_folder)
        coco_val_features = Image_Feature_Loader(
                            'val', model_hps.relation_type,
                            adaptive=model_hps.adaptive,
示例#16
0
文件: main.py 项目: FengSuSky/CCB-VQA
def main():
    args = parse_args()
    dataset = args.dataset
    args.output = os.path.join('logs', args.output)
    if not os.path.isdir(args.output):
        utils.create_dir(args.output)
    else:
        if click.confirm('Exp directory already exists in {}. Erase?'.format(
                args.output, default=False)):
            os.system('rm -r ' + args.output)
            utils.create_dir(args.output)

        else:
            os._exit(1)

    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train',
                                   dictionary,
                                   dataset=dataset,
                                   cache_image_features=args.cache_features)

    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    get_bias(train_dset, eval_dset)

    # Build the model using the original constructor
    constructor = 'build_%s' % args.model
    model = getattr(CCB_model,
                    constructor)(train_dset,
                                 args.num_hid).cuda()  #or base_model
    if dataset == 'cpv1':
        model.w_emb.init_embedding('data/glove6b_init_300d_v1.npy')
    elif dataset == 'cpv2' or dataset == 'v2':
        model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    # Add the loss_fn based our arguments
    if args.debias == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.debias == "none":
        model.debias_loss_fn = Plain()
    elif args.debias == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.debias == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    elif args.debias == 'focal':
        model.debias_loss_fn = Focal()
    elif args.debias == 'CCB_loss':
        model.debias_loss_fn = CCB_loss(args.entropy_penalty)
    else:
        raise RuntimeError(args.mode)

    with open('util/qid2type_%s.json' % args.dataset, 'r') as f:
        qid2type = json.load(f)
    model = model.cuda()
    batch_size = args.batch_size

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    train_loader = DataLoader(train_dset,
                              batch_size,
                              shuffle=True,
                              num_workers=5)
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=5)

    print("Starting training...")
    train(model, train_loader, eval_loader, args, qid2type)
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    net = Question_Classifier(args.bert_mode,
                              args.bert_pretrain,
                              num_classes=3)

    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    if args.resume_epoch != 0:
        runs = sorted(
            glob.glob(
                os.path.join(save_dir_root, 'run', args.train_fold, 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) if runs else 0
    else:
        runs = sorted(
            glob.glob(
                os.path.join(save_dir_root, 'run', args.train_fold, 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    if args.run_id >= 0:
        run_id = args.run_id

    save_dir = os.path.join(save_dir_root, 'run', args.train_fold,
                            'run_' + str(run_id))
    log_dir = os.path.join(
        save_dir,
        datetime.now().strftime('%b%d_%H-%M-%M%S') + '_' +
        socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    logger = open(os.path.join(save_dir, 'log.txt'), 'w')
    logger.write(
        'optim: SGD \nlr=%.4f\nweight_decay=%.4f\nmomentum=%.4f\nupdate_lr_every=%d\nseed=%d\n'
        % (args.lr, args.weight_decay, args.momentum, args.update_lr_every,
           args.seed))

    if not os.path.exists(os.path.join(save_dir, 'models')):
        os.makedirs(os.path.join(save_dir, 'models'))

    if args.resume_epoch == 0:
        print('Training from scratch...')
    else:
        net_resume_path = os.path.join(
            save_dir, 'models',
            'mcnet_epoch-' + str(args.resume_epoch - 1) + '.pth')
        print('Initializing weights from: {}, epoch: {}...'.format(
            save_dir, resume_epoch))
        net.load_state_dict(
            torch.load(net_resume_path,
                       map_location=lambda storage, loc: storage))

    torch.cuda.set_device(device=0)
    net.cuda()

    net_optim = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    dictionary = Dictionary.load_from_file(args.dictionary_path)
    trainset0 = Question_Dataset('train0',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    trainset1 = Question_Dataset('train1',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    trainset2 = Question_Dataset('train2',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    valset = Question_Dataset('val',
                              dictionary,
                              args.data_root,
                              question_len=12)
    testset = Question_Dataset('test',
                               dictionary,
                               args.data_root,
                               question_len=12)

    trainloader0 = DataLoader(trainset0,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    trainloader1 = DataLoader(trainset1,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    trainloader2 = DataLoader(trainset2,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    valloader = DataLoader(valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=2)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2)

    num_iter_tr = len(trainloader0)
    nitrs = args.resume_epoch * num_iter_tr
    nsamples = args.batch_size * nitrs
    print('each_epoch_num_iter: %d' % (num_iter_tr))

    global_step = 0

    epoch_losses = []
    recent_losses = []
    start_t = time.time()
    print('Training Network')

    for epoch in range(args.resume_epoch, args.nepochs):

        net.train()

        epoch_losses = []

        for ii, (sample_batched0, sample_batched1,
                 sample_batched2) in enumerate(
                     zip(trainloader0, trainloader1, trainloader2)):
            question0, label0 = sample_batched0['question'], sample_batched0[
                'label']
            question0, label0 = question0.cuda(), label0.cuda()
            question1, label1 = sample_batched1['question'], sample_batched1[
                'label']
            question1, label1 = question1.cuda(), label1.cuda()
            question2, label2 = sample_batched2['question'], sample_batched2[
                'label']
            question2, label2 = question2.cuda(), label2.cuda()

            global_step += args.batch_size

            out0 = net.forward(question0)
            out1 = net.forward(question1)
            out2 = net.forward(question2)

            loss0 = utils.CELoss(logit=out0, target=label0, reduction='mean')
            loss1 = utils.CELoss(logit=out1, target=label1, reduction='mean')
            loss2 = utils.CELoss(logit=out2, target=label2, reduction='mean')
            loss = (loss0 + loss1 + loss2) / 3

            trainloss = loss.item()
            epoch_losses.append(trainloss)
            if len(recent_losses) < args.log_every:
                recent_losses.append(trainloss)
            else:
                recent_losses[nitrs % len(recent_losses)] = trainloss
            net_optim.zero_grad()
            loss.backward()
            net_optim.step()

            nitrs += 1
            nsamples += args.batch_size

            if nitrs % args.log_every == 0:
                meanloss = sum(recent_losses) / len(recent_losses)

                print('epoch: %d ii: %d trainloss: %.2f timecost:%.2f secs' %
                      (epoch, ii, meanloss, time.time() - start_t))
                writer.add_scalar('data/trainloss', meanloss, nsamples)

        # validation
        net.eval()
        val_acc = 0.0
        test_acc = 0.0

        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            val_acc += (tmp_acc * question.shape[0])
        val_acc /= len(valset)

        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            test_acc += (tmp_acc * question.shape[0])
        test_acc /= len(testset)

        print('Validation:')
        print('epoch: %d, val_questions: %d val_acc: %.4f' %
              (epoch, len(valset), val_acc))
        print('epoch: %d, test_questions: %d test_acc: %.4f' %
              (epoch, len(testset), test_acc))
        writer.add_scalar('data/valid_acc', val_acc, nsamples)

        if epoch % args.save_every == args.save_every - 1:
            net_save_path = os.path.join(
                save_dir, 'models',
                'question_classifier_epoch-' + str(epoch) + '.pth')
            torch.save(net.state_dict(), net_save_path)
            print("Save net at {}\n".format(net_save_path))

        if epoch % args.update_lr_every == args.update_lr_every - 1:
            lr_ = utils.lr_poly(args.lr, epoch, args.nepochs, 0.9)
            print('(poly lr policy) learning rate: ', lr_)
            net_optim = optim.SGD(net.parameters(),
                                  lr=lr_,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
示例#18
0
def main_worker(gpu, args):
    args.gpu = gpu

    if args.multiGPUs and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print('Use GPU: {} for training'.format(args.gpu))

    if args.multiGPUs:
        args.rank = gpu
        setup(args.rank, args.world_size)

        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            args.workers = int(
                (args.workers + args.world_size - 1) / args.world_size)

    # prepare data
    if args.task == 'pvqa':
        dict_path = 'data/pvqa/pvqa_dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        test_dset = PVQAFeatureDataset(args.data_split,
                                       dictionary,
                                       adaptive=False)
        w_emb_path = 'data/pvqa/glove_pvqa_300d.npy'
    else:
        raise Exception('%s not implemented yet' % args.task)

    if args.task == 'pvqa':
        test_loader = DataLoader(test_dset,
                                 args.batch_size,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True)

    # prepare model

    model = BanModel(ntoken=test_dset.dictionary.ntoken,
                     num_ans_candidates=test_dset.num_ans_candidates,
                     num_hid=args.num_hid,
                     v_dim=test_dset.v_dim,
                     op=args.op,
                     gamma=args.gamma,
                     qa_bl=args.qa_bl)

    tfidf = None
    weights = None
    model.w_emb.init_embedding(w_emb_path, tfidf, weights)

    if args.multiGPUs:
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            args.workers = int(
                (args.workers + args.world_size - 1) / args.world_size)
            model = DDP(model, device_ids=[args.gpu])
        else:
            model.cuda()
            model = DDP(model)
    else:
        torch.cuda.set_device(args.gpu)
        model.cuda(args.gpu)

    # load snapshot
    if args.input is not None:
        print('#8')
        print('loading %s' % args.input)
        if args.gpu is None:
            model_data = torch.load(args.input)
        else:
            loc = 'cuda:{}'.format(args.gpu)
            model_data = torch.load(args.input, map_location=loc)
        model_data_sd = model_data.get('model_state', model_data)

        model.load_state_dict(model_data_sd)

    res = evaluate(test_loader, model, args)
    eval_score = res['eval_score']
    preds = res['preds']
    anss = res['anss']
    b_scores = []
    b_scores_1 = []
    b_scores_2 = []
    b_scores_3 = []
    assert len(preds) == len(
        anss), 'len(preds)=%d, len(anss)=%d' % (len(preds), len(anss))
    for i in range(len(preds)):
        pred_ans = test_dset.label2ans[preds[i]]
        gt_ans = test_dset.entries[i]['ans_sent']
        b_score = sentence_bleu(references=[str(gt_ans).lower().split()],
                                hypothesis=str(pred_ans).lower().split())
        b_score_1 = sentence_bleu(references=[str(gt_ans).lower().split()],
                                  hypothesis=str(pred_ans).lower().split(),
                                  weights=(1, 0, 0, 0))
        b_score_2 = sentence_bleu(references=[str(gt_ans).lower().split()],
                                  hypothesis=str(pred_ans).lower().split(),
                                  weights=(0, 1, 0, 0))
        b_score_3 = sentence_bleu(references=[str(gt_ans).lower().split()],
                                  hypothesis=str(pred_ans).lower().split(),
                                  weights=(0, 0, 1, 0))
        b_scores.append(b_score)
        b_scores_1.append(b_score_1)
        b_scores_2.append(b_score_2)
        b_scores_3.append(b_score_3)

    b_score_m = np.mean(b_scores)
    b_score_m_1 = np.mean(b_scores_1)
    b_score_m_2 = np.mean(b_scores_2)
    b_score_m_3 = np.mean(b_scores_3)
    b_score_info = 'bleu score=%.4f\n' % b_score_m
    b_score_info_1 = 'bleu1 score=%.4f\n' % b_score_m_1
    b_score_info_2 = 'bleu2 score=%.4f\n' % b_score_m_2
    b_score_info_3 = 'bleu3 score=%.4f' % b_score_m_3
    print(b_score_info)
    print(b_score_info_1)
    print(b_score_info_2)
    print(b_score_info_3)
    with open(os.path.join(args.output, 'type_result.txt'), 'a') as f:
        f.write(b_score_info)
        f.write(b_score_info_1)
        f.write(b_score_info_2)
        f.write(b_score_info_3)
示例#19
0
    parser.add_argument('--num_hid', type=int, default=1024)
    parser.add_argument('--model', type=str, default='baseline0_newatt')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary)
    eval_dset = VQAFeatureDataset('val', dictionary)
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    model = nn.DataParallel(model).cuda()

    train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
    eval_loader =  DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
    train(model, train_loader, eval_loader, args.epochs, args.output)
示例#20
0
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    #args = parse_args()

    #torch.manual_seed(args.seed)
    torch.manual_seed(1111)

    #torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed(1111)

    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file(
        os.path.join(saved_data_path, 'dictionary.pkl'))
    train_dset = VQAFeatureDataset_Relation(name='train',
                                            dictionary=dictionary)
    #eval_dset = VQAFeatureDataset3('val', dictionary)
    #test_dset = VQAFeatureDataset('test', dictionary)

    #batch_size = args.batch_size
    batch_size = 512

    #constructor = 'build_%s' % args.model

    #constructor = 'build_%s' % 'baseline0_newatt'
    constructor = 'build_%s' % 'baseline0_both_guided_newatt'

    #model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model = getattr(final_base_model, constructor)(train_dset, 1024).cuda()
示例#21
0
def main():
    args = parse_args()

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    cp = not args.nocp

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train', dictionary, cp=cp,
                                   cache_image_features=args.cache_features)
    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val', dictionary, cp=cp,
                                  cache_image_features=args.cache_features)

    answer_voc_size = train_dset.num_ans_candidates

    # Compute the bias:
    # The bias here is just the expected score for each answer/question type

    # question_type -> answer -> total score
    question_type_to_probs = defaultdict(Counter)
    # question_type -> num_occurances
    question_type_to_count = Counter()
    for ex in train_dset.entries:
        ans = ex["answer"]
        q_type = ans["question_type"]
        question_type_to_count[q_type] += 1
        if ans["labels"] is not None:
            for label, score in zip(ans["labels"], ans["scores"]):
                question_type_to_probs[q_type][label] += score

    question_type_to_prob_array = {}
    for q_type, count in question_type_to_count.items():
        prob_array = np.zeros(answer_voc_size, np.float32)
        for label, total_score in question_type_to_probs[q_type].items():
            prob_array[label] += total_score
        prob_array /= count
        question_type_to_prob_array[q_type] = prob_array

    # Now add a `bias` field to each example
    for ds in [train_dset, eval_dset]:
        for ex in ds.entries:
            q_type = ex["answer"]["question_type"]
            ex["bias"] = question_type_to_prob_array[q_type]

    # Build the model using the original constructor
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    # Add the loss_fn based our arguments
    if args.mode == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.mode == "none":
        model.debias_loss_fn = Plain()
    elif args.mode == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.mode == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    else:
        raise RuntimeError(args.mode)

    # Record the bias function we are using
    utils.create_dir(args.output)
    with open(args.output + "/debias_objective.json", "w") as f:
        js = model.debias_loss_fn.to_json()
        json.dump(js, f, indent=2)

    model = model.cuda()
    batch_size = args.batch_size

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    # The original version uses multiple workers, but that just seems slower on my setup
    train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=0)
    eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=0)

    print("Starting training...")
    train(model, train_loader, eval_loader, args.epochs, args.output, args.eval_each_epoch)
示例#22
0
#         utils.create_dir(args.output)
#         if 0 <= args.epoch:
#             model_label += '_epoch%d' % args.epoch

#         with open(args.output+'/%s_%s.json' \
#             % (args.split, model_label), 'w') as f:
#             json.dump(results, f)

#     process(args, model, eval_loader)

if __name__ == '__main__':
    args = parse_args()

    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('ban-vqa-demo/data/dictionary.pkl')
    ans2label_path = os.path.join('ban-vqa-demo/data/cache',
                                  'trainval_ans2label.pkl')
    label2ans_path = os.path.join('ban-vqa-demo/data/cache',
                                  'trainval_label2ans.pkl')
    ans2label = pkl.load(open(ans2label_path, 'rb'))
    label2ans = pkl.load(open(label2ans_path, 'rb'))
    num_ans_candidates = len(ans2label)

    eval_dset = VQAFeatureDataset_Custom(dictionary,
                                         len(ans2label),
                                         adaptive=True)
    print(ans2label)
    n_device = torch.cuda.device_count()
    batch_size = args.batch_size * n_device
示例#23
0
    results = []
    for i in range(logits.size(0)):
        result = {}
        result['types'] = types[i]
        result['question_id'] = qIds[i]
        result['answer'] = get_answer(logits[i], dataloader)
        results.append(result)
    return results


if __name__ == '__main__':
    args = parse_args()

    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/cocodictionary.pkl')
    eval_dset = COCOFeatureDataset(args.split, dictionary, adaptive=False)

    n_device = torch.cuda.device_count()
    batch_size = args.batch_size * n_device

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda()
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=utils.trim_collate)

    def process(args, model, eval_loader):
        model_path = args.input + '/model%s.pth' % \
示例#24
0
def save_results(results, savedir):
    path_rslt = os.path.join(savedir, 'results.json')
    with open(path_rslt, 'w') as handle:
        json.dump(results, handle)


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    #q_dict = Dictionary.load_from_file('data/question_dictionary.pkl')
    #c_dict = Dictionary.load_from_file('data/caption_dictionary.pkl')
    q_dict = Dictionary.load_from_file('data/VQAE/question_dictionary.pkl')
    c_dict = Dictionary.load_from_file('data/VQAE/explain_dictionary.pkl')

    #train_dset = VQAFeatureDataset('train', q_dict, c_dict, args.att_thr)
    #eval_dset = VQAFeatureDataset('val', q_dict, c_dict, args.att_thr)
    train_dset = VQAEDataset('train', q_dict, c_dict, 'cache/VQAE2')
    eval_dset = VQAEDataset('val', q_dict, c_dict, 'cache/VQAE2')
    #train_dset = VQAEVQA2Dataset('train', q_dict, c_dict, 'cache')
    #eval_dset = VQAEVQA2Dataset('val', q_dict, c_dict, 'cache')
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = utils.factory(constructor, train_dset, args.num_hid, args.att_dim,
                          args.decode_dim).cuda()

    model_path = os.path.join(args.output, 'model.pth')
示例#25
0
                total_number += 1
            else:
                print('Hahahahahahahahahahaha')

    score = score / len(dataloader.dataset)
    V_loss /= len(dataloader.dataset)
    score_yesno /= total_yesno
    score_other /= total_other
    score_number /= total_number

    return score, score_yesno, score_other, score_number


if __name__ == '__main__':
    opt = opts.parse_opt()
    dictionary = Dictionary.load_from_file(f'{opts.data_dir}/dictionary.pkl')
    opt.ntokens = dictionary.ntoken
    model = Model_explain2(opt)

    model = model.cuda()
    model = nn.DataParallel(model).cuda()
    # model = model.cuda()

    eval_dset = GraphQAIMGDataset('v2cp_test', dictionary, opt)
    eval_loader = DataLoader(eval_dset, opt.batch_size, shuffle=False, num_workers=0)

    states_ = torch.load('saved_models/%s/model-best.pth'%opt.load_model_states)
    states = model.state_dict()
    for k in states_.keys():
        if k in states:
            states[k] = states_[k]
示例#26
0
        json.dump(results, handle)


if __name__ == '__main__':
    args = parse_args()
    args.output = args.output + '_' + str(args.temperature)
    share_qe_dict = False
    vocab_source = 'VQAE'  #### 'VQAE' or 'VQAv2'

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    if share_qe_dict:
        qe_dict = Dictionary.load_from_file(
            os.path.join('data', vocab_source,
                         'question_explain_dictionary.pkl'))
    else:
        q_dict = Dictionary.load_from_file(
            os.path.join('data', vocab_source, 'question_dictionary.pkl'))
        c_dict = Dictionary.load_from_file(
            os.path.join('data', vocab_source, 'explain_dictionary.pkl'))

    #train_dset = VQAFeatureDataset('train', q_dict, c_dict, 'cache/VQAE2',args.att_thr)
    #eval_dset = VQAFeatureDataset('val', q_dict, c_dict, 'cache/VQAE2',args.att_thr)
    train_dset = VQAEDataset('train', q_dict, c_dict, 'cache/VQAE2')
    eval_dset = VQAEDataset('val', q_dict, c_dict, 'cache/VQAE2')
    #train_dset = VQAEVQA2Dataset('train', q_dict, c_dict, 'cache')
    #eval_dset = VQAEVQA2Dataset('val', q_dict, c_dict, 'cache')
    batch_size = args.batch_size
示例#27
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    dictionary = Dictionary.load_from_file(args.dictionary_path)
    feature_dict = {}

    ques_net = Question_Classifier(args.bert_mode,
                                   args.bert_pretrain,
                                   num_classes=args.ques_num_classes)
    img_net = Network(backbone_type=args.backbone_type,
                      num_classes=args.img_num_classes)
    cls_net = models.resnet34(pretrained=False, num_classes=2)
    cls_net = cls_net.cuda()
    ques_net = ques_net.cuda()
    img_net = img_net.cuda()

    cls_net.load_state_dict(torch.load(args.cls2_model_path))
    ques_net.load_state_dict(
        torch.load(args.ques_model_path,
                   map_location=lambda storage, loc: storage))
    img_net.load_model(args.img_model_path)

    eval_dset = VQAFeatureDataset(args.split,
                                  dictionary,
                                  args.data_root,
                                  question_len=12,
                                  clip=True)
    eval_loader = DataLoader(eval_dset,
                             args.batch_size,
                             shuffle=False,
                             num_workers=2)

    cls_net.eval()
    ques_net.eval()
    img_net.eval()

    gt_list = []
    with torch.no_grad():
        for v, q, a, ans_type, q_types, image_name in tqdm(iter(eval_loader)):
            v, q, a = v.cuda(), q.cuda(), a.cuda()
            v = v.reshape(v.shape[0], 3, 224, 224)
            q_prob = ques_net(q)  # ques_num_classes
            q_prob = q_prob[
                0]  # [0: closed-ended-normal, 1: closed-ended-abnormal 2: open-ended]
            q_type = torch.argmax(q_prob)

            v_prob, feature = img_net(v)  # 1 x img_num_classes

            if q_type == 0 or q_type == 1:
                continue
            else:
                feature = feature.cpu().numpy().tolist()
                temp_list = []
                for i in feature:
                    temp_list.append(round(i, 4))
                gt = torch.argmax(a[0]).item()
                if gt not in gt_list:
                    gt_list.append(gt)
                    feature_dict[gt] = [temp_list]
                elif gt in gt_list:
                    feature_dict[gt].append(temp_list)
        json.dump(feature_dict, open('feature_dict.json', 'w'))
示例#28
0
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    args = parse_args()
    dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k'

    dictionary_path = os.path.join(dataroot, 'dictionary.pkl')

    d = create_dictionary(dataroot, args.task)
    d.dump_to_file(dictionary_path)

    d = Dictionary.load_from_file(dictionary_path)
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
示例#29
0
def main_worker(gpu, args):
    args.gpu = gpu

    if args.multiGPUs and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print('Use GPU: {} for training'.format(args.gpu))

    if args.multiGPUs:
        args.rank = gpu
        setup(args.rank, args.world_size)

        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            args.workers = int(
                (args.workers + args.world_size - 1) / args.world_size)

    # prepare data
    if args.task == 'pvqa':
        dict_path = 'data/pvqa/pvqa_dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        train_dset = PVQAFeatureDataset(args.train, dictionary, adaptive=False)
        val_dset = PVQAFeatureDataset(args.val, dictionary, adaptive=False)
        w_emb_path = 'data/pvqa/glove_pvqa_300d.npy'
    else:
        raise Exception('%s not implemented yet' % args.task)

    if args.multiGPUs:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dset)
    else:
        train_sampler = None

    if args.task == 'pvqa':
        train_loader = DataLoader(train_dset,
                                  args.batch_size,
                                  shuffle=False,
                                  num_workers=args.workers,
                                  pin_memory=True,
                                  sampler=train_sampler)
        eval_loader = DataLoader(val_dset,
                                 args.batch_size,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True)

    # prepare model

    model = BanModel(ntoken=train_dset.dictionary.ntoken,
                     num_ans_candidates=train_dset.num_ans_candidates,
                     num_hid=args.num_hid,
                     v_dim=train_dset.v_dim,
                     op=args.op,
                     gamma=args.gamma)

    tfidf = None
    weights = None
    model.w_emb.init_embedding(w_emb_path, tfidf, weights)

    if args.multiGPUs:
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            args.workers = int(
                (args.workers + args.world_size - 1) / args.world_size)
            model = DDP(model, device_ids=[args.gpu])
        else:
            model.cuda()
            model = DDP(model)
    else:
        torch.cuda.set_device(args.gpu)
        model.cuda(args.gpu)

    # load snapshot
    if args.input is not None:
        print('#8')
        print('loading %s' % args.input)
        if args.gpu is None:
            model_data = torch.load(args.input)
        else:
            loc = 'cuda:{}'.format(args.gpu)
            model_data = torch.load(args.input, map_location=loc)
        model_data_sd = model_data.get('model_state', model_data)

        for name, param in model.named_parameters():
            if name in model_data_sd:
                param.data = model_data_sd[name]

        # optimizer = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()))
        # optimizer.load_state_dict(model_data.get('optimizer_state', model_data))
        args.start_epoch = model_data['epoch'] + 1

    optimizer = torch.optim.Adamax(
        filter(lambda p: p.requires_grad, model.parameters()))

    best_eval_score = 0
    for epoch in range(args.start_epoch, args.epochs):
        if args.multiGPUs:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, eval_loader, train_dset, model, optimizer, epoch,
              args)

        eval_score = evaluate(eval_loader, model, args)

        with open(os.path.join(args.output, 'log.log'), 'a') as f:
            f.write(str(datetime.datetime.now()))
            f.write('epoch=%d' % epoch)
            f.write('eval_score=%.4f' % eval_score)

        print('eval_score=', eval_score)
        print('best eval_score = ', best_eval_score)

        if not args.multiGPUs or (args.multiGPUs and args.gpu == 0):
            if eval_score > best_eval_score:
                model_path = os.path.join(args.output, 'model_best.pth')
                utils.save_model(model_path, model, epoch, optimizer)
                best_eval_score = eval_score
示例#30
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--pretrained_buatt_model',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--use_pretrained_buatt',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--frcnn_feat_dir',
                        type=str,
                        help='Location of output from detectron')
    parser.add_argument('--train_file',
                        default="train_new_2000_all.json",
                        type=str,
                        help='trainfile name')
    parser.add_argument('--dev_file',
                        default="dev_new_2000_all.json",
                        type=str,
                        help='dev file name')
    parser.add_argument('--test_file',
                        default="test_new_2000_all.json",
                        type=str,
                        help='test file name')
    parser.add_argument('--model_saving_name',
                        type=str,
                        help='save name of the outpul model')

    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--num_hid', type=int, default=1024)
    parser.add_argument('--model',
                        type=str,
                        default='baseline0grid_imsitu_agent')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--num_iter', type=int, default=1)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')

    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    clip_norm = 0.25
    n_epoch = args.epochs
    batch_size = args.batch_size
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    print('model spec :, top down att with role q ')

    train_set = json.load(open(dataset_folder + '/' + args.train_file))
    imsitu_roleq = json.load(open("data/imsitu_questions_prev.json"))

    dict_path = 'data/dictionary_imsitu_roleall.pkl'
    dictionary = Dictionary.load_from_file(dict_path)
    w_emb_path = 'data/glove6b_init_imsitu_roleall_300d.npy'
    encoder = imsitu_encoder(train_set, imsitu_roleq, dictionary)

    train_set = imsitu_loader_roleq_buatt_place(imgset_folder, train_set,
                                                encoder, dictionary, 'train',
                                                encoder.train_transform)

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_set, args.num_hid,
                                             len(encoder.place_label_list),
                                             encoder)

    model.w_emb.init_embedding(w_emb_path)

    #print('MODEL :', model)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + '/' + args.dev_file))
    dev_set = imsitu_loader_roleq_buatt_place(imgset_folder, dev_set, encoder,
                                              dictionary, 'val',
                                              encoder.dev_transform)
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + '/' + args.test_file))
    test_set = imsitu_loader_roleq_buatt_place(imgset_folder, test_set,
                                               encoder, dictionary, 'test',
                                               encoder.dev_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=n_worker)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(1234)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
        torch.cuda.manual_seed(1234)
        torch.backends.cudnn.deterministic = True

    if args.use_pretrained_buatt:
        print('Use pretrained from: {}'.format(args.pretrained_buatt_model))
        if len(args.pretrained_buatt_model) == 0:
            raise Exception('[pretrained buatt module] not specified')
        #model_data = torch.load(args.pretrained_ban_model, map_location='cpu')
        #model.load_state_dict(model_data.get('model_state', model_data))

        utils_imsitu.load_net_ban(args.pretrained_buatt_model, [model],
                                  ['module'], ['w_emb', 'classifier'])
        model_name = 'pre_trained_buatt'
    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained module] not specified')
        utils_imsitu.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        model_name = 'train_full'

    utils_imsitu.set_trainable(model, True)
    #utils_imsitu.set_trainable(model.classifier, True)
    #utils_imsitu.set_trainable(model.w_emb, True)
    #utils_imsitu.set_trainable(model.q_emb, True)
    optimizer = torch.optim.Adamax([
        {
            'params': model.classifier.parameters()
        },
        {
            'params': model.w_emb.parameters()
        },
        {
            'params': model.q_emb.parameters(),
            'lr': 5e-4
        },
        {
            'params': model.v_att.parameters(),
            'lr': 5e-5
        },
        {
            'params': model.q_net.parameters(),
            'lr': 5e-5
        },
        {
            'params': model.v_net.parameters(),
            'lr': 5e-5
        },
    ],
                                   lr=1e-3)

    #utils_imsitu.set_trainable(model, True)
    #optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model,
                                    dev_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}',
                                                      '1-'),
            utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        role_dict = top1.role_dict
        fail_val_all = top1.value_all_dict
        pass_val_dict = top1.vall_all_correct

        with open('role_pred_data.json', 'w') as fp:
            json.dump(role_dict, fp, indent=4)

        with open('fail_val_all.json', 'w') as fp:
            json.dump(fail_val_all, fp, indent=4)

        with open('pass_val_all.json', 'w') as fp:
            json.dump(pass_val_dict, fp, indent=4)

        print('Writing predictions to file completed !')

    elif args.test:
        top1, top5, val_loss = eval(model,
                                    test_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Test average :{:.2f} {} {}'.format(
            avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}',
                                                      '1-'),
            utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-')))

    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, None, optimizer, scheduler,
              n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, None,
              model_name, args.model_saving_name, args)
def create_glove_embedding_init(idx2word, glove_file):
    word2emb = {}
    with open(glove_file, 'r') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        #vals = map(float, vals[1:])
        valv = [float(v) for v in vals[1:]]
        word2emb[word] = np.array(valv)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary('data')
    d.dump_to_file('data/caption_dictionary.pkl')

    d = Dictionary.load_from_file('data/caption_dictionary.pkl')
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_caption_init_%dd.npy' % emb_dim, weights)