Exemplo n.º 1
0
def validate(model, data, device, detail=False):
    count, correct = 0, 0
    model.eval()
    details = { cat:[0,0] for cat in {'count', 'compare number', 'exist', 'query', 'compare attribute'}}
    print('validate...')
    for batch in tqdm(data, total=len(data)):
        answers, questions, *batch_input = [todevice(x, device) for x in batch]
        logits = model(*batch_input)
        predicts = logits.max(1)[1]
        correct += torch.eq(predicts, answers).long().sum().item()
        count += answers.size(0)
        if detail:
            programs = batch_input[0]
            for i in range(len(answers)):
                for j in range(len(programs[i])):
                    program = data.vocab['program_idx_to_token'][programs[i][j].item()]
                    if program in ['<NULL>', '<START>', '<END>', '<UNK>', 'unique']:
                        continue
                    cat = map_program_to_cat[program]
                    details[cat][0] += int(predicts[i].item()==answers[i].item())
                    details[cat][1] += 1
                    break
    acc = correct / count
    if detail:
        details = { k:(v[0]/v[1]) for k,v in details.items() }
        return acc, details
    return acc
Exemplo n.º 2
0
def train(args):
    logging.info("Create train_loader and val_loader.........")
    train_loader_kwargs = {
        'question_pt': args.train_question_pt,
        'scene_pt': args.train_scene_pt,
        'vocab_json': args.vocab_json,
        'batch_size': args.batch_size,
        'ratio': args.ratio,
        'shuffle': True
    }
    val_loader_kwargs = {
        'question_pt': args.val_question_pt,
        'scene_pt': args.val_scene_pt,
        'vocab_json': args.vocab_json,
        'batch_size': args.batch_size,
        'shuffle': False
    }
    
    train_loader = ClevrDataLoader(**train_loader_kwargs)
    val_loader = ClevrDataLoader(**val_loader_kwargs)

    logging.info("Create model.........")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_kwargs = { k:v for k,v in vars(args).items() if k in {
        'dim_v', 'dim_pre_v', 'num_edge_cat', 'num_class', 'num_attribute',
        } }
    model_kwargs_tosave = copy.deepcopy(model_kwargs) 
    model_kwargs['vocab'] = train_loader.vocab
    model = XNMNet(**model_kwargs).to(device)
    logging.info(model)

    optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.l2reg)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[int(1/args.ratio)], gamma=0.1)
    criterion = nn.CrossEntropyLoss().to(device)
    logging.info("Start training........")
    tic = time.time()
    iter_count = 0
    for epoch in range(args.num_epoch):
        for i, batch in enumerate(train_loader.generator()):
            iter_count += 1
            progress = epoch+i/len(train_loader)
            answers, questions, *batch_input = \
                    [todevice(x, device) for x in batch]

            logits, others = model(*batch_input)
            loss = criterion(logits, answers)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % (len(train_loader) // 10) == 0:
                logging.info("Progress %.3f  loss = %.3f" % (progress, loss.item()))
        scheduler.step()
        if (epoch+1) % 1 == 0:
            valid_acc = validate(model, val_loader, device)
            logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc)

            save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt')) 
            logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir))
Exemplo n.º 3
0
def validate(model, data, device, detail=False):
    count, correct = 0, 0
    model.eval()
    details = {
        cat: [0, 0]
        for cat in
        {'count', 'compare number', 'exist', 'query', 'compare attribute'}
    }
    print('validate...')
    for batch in tqdm(data.generator(), total=len(data)):
        answers, questions, *batch_input = [todevice(x, device) for x in batch]
        logits, others = model(*batch_input)
        predicts = logits.max(1)[1]
        """
        There are some counting questions in CLEVR whose answer is a large number (such as 8 and 9). 
        However, as the training instances of such questions are very few, 
        the predictions of our softmax-based classifier can't reach a 100% accuracy for counting questions (we can only reach up to 99.99%). 
        Thanks to our attention mechanism over scene graphs, we can predict the answers of counting questions by directly summing up the node attention, 
        instead of feeding hidden features into a classifier. This alternative strategy gives a 100% counting accuracy.
        """
        # correct += torch.eq(predicts, answers).long().sum().item()
        count_outputs = others['count_outputs']
        for i in range(len(count_outputs)):
            if count_outputs[i] is None:
                correct += int(predicts[i].item() == answers[i].item())
            else:
                p = int(round(count_outputs[i].item()))
                a = int(data.vocab['answer_idx_to_token'][answers[i].item()])
                correct += int(p == a)
        count += answers.size(0)
        if detail:
            programs = batch_input[0]
            for i in range(len(answers)):
                for j in range(len(programs[i])):
                    program = data.vocab['program_idx_to_token'][programs[i]
                                                                 [j].item()]
                    if program in [
                            '<NULL>', '<START>', '<END>', '<UNK>', 'unique'
                    ]:
                        continue
                    cat = map_program_to_cat[program]
                    if program == 'count':
                        p = int(round(count_outputs[i].item()))
                        a = int(data.vocab['answer_idx_to_token'][
                            answers[i].item()])
                    else:
                        p = predicts[i].item()
                        a = answers[i].item()
                    details[cat][0] += int(p == a)
                    details[cat][1] += 1
                    break
    acc = correct / count
    if detail:
        details = {k: (v[0] / v[1]) for k, v in details.items()}
        return acc, details
    return acc
Exemplo n.º 4
0
def test(model, data, device):
    model.eval()
    results = []
    for batch in tqdm(data, total=len(data)):
        coco_ids, answers, *batch_input = [todevice(x, device) for x in batch]
        logits, others = model(*batch_input)
        predicts = torch.max(logits, dim=1)[1]
        for predict in predicts:
            results.append(data.vocab['answer_idx_to_token'][predict.item()])
    return results
Exemplo n.º 5
0
def validate(model, data, device):
    count, correct = 0, 0
    model.eval()
    print('validate...')
    total_acc, count = 0, 0
    for batch in tqdm(data, total=len(data)):
        coco_ids, answers, *batch_input = [todevice(x, device) for x in batch]
        logits, others = model(*batch_input)
        acc = batch_accuracy(logits, answers)
        total_acc += acc.sum().item()
        count += answers.size(0)
    acc = total_acc / count
    return acc
Exemplo n.º 6
0
def validate(model, data, device, withLossFlag=False, func=None):
    print('validate...')
    model.eval()
    total_acc, count = 0, 0
    total_loss = 0
    for batch in tqdm(data, total=len(data)):
        coco_ids, answers, *batch_input = [todevice(x, device) for x in batch]
        logits, others = model(*batch_input)
        if withLossFlag:
            nll = -func.log_softmax(logits, dim=1)
            loss = (nll * answers / 10).sum(dim=1).mean()
            total_loss += loss.item()
        acc = batch_accuracy(logits, answers)
        total_acc += acc.sum().item()
        count += answers.size(0)
    acc = total_acc / count
    if withLossFlag:
        return acc, total_loss / len(data)
    return acc
Exemplo n.º 7
0
def test_with_david_generated_program(model, data, device, pretrained_dir):
    program_generator = load_program_generator(os.path.join(pretrained_dir, 'program_generator.pt')).to(device)
    david_vocab = json.load(open(os.path.join(pretrained_dir, 'david_vocab.json')))
    david_vocab['program_idx_to_token'] = invert_dict(david_vocab['program_token_to_idx'])
    results = [] 
    model.eval()
    for batch in tqdm(data, total=len(data)):
        _, questions, gt_programs, gt_program_inputs, features, edge_vectors = [todevice(x, device) for x in batch]
        programs, program_inputs = [], []
        # generate program using david model for each question
        for i in range(questions.size(0)):
            question_str = []
            for j in range(questions.size(1)):
                word = data.vocab['question_idx_to_token'][questions[i,j].item()]
                if word == '<START>': continue
                if word == '<END>': break
                question_str.append(word)
            question_str = ' '.join(question_str) # question string
            david_program = generate_single_program(question_str, program_generator, david_vocab, device)
            david_program = [david_vocab['program_idx_to_token'][i.item()] for i in david_program.squeeze()]
            # convert david program to ours. return two index lists
            program, program_input = convert_david_program_to_mine(david_program, data.vocab)
            programs.append(program)
            program_inputs.append(program_input)
        # padding
        max_len = max(len(p) for p in programs)
        for i in range(len(programs)):
            while len(programs[i]) < max_len:
                programs[i].append(vocab['program_token_to_idx']['<NULL>'])
                program_inputs[i].append(vocab['question_token_to_idx']['<NULL>'])
        # to tensor
        programs = torch.LongTensor(programs).to(device)
        program_inputs = torch.LongTensor(program_inputs).to(device)

        logits = model(programs, program_inputs, features, edge_vectors)
        predicts = logits.max(1)[1]
        for predict in predicts: # note questions must not shuffle!
            results.append(data.vocab['answer_idx_to_token'][predict.item()])
    return results
Exemplo n.º 8
0
def train():
    train_loader = VQADataLoader(**train_loader_kwargs)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_kwargs.update({'vocab': train_loader.vocab,'device': device})
    val_loader = VQADataLoader(**val_loader_kwargs)
    model = XNMNet(**model_kwargs).to(device)
    train_loader.glove_matrix = torch.FloatTensor(train_loader.glove_matrix).to(device)
    with torch.no_grad():
        model.token_embedding.weight.set_(train_loader.glove_matrix)
    ################################################################
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(parameters, lr, weight_decay=0)
    for epoch in range(num_epoch):
        model.train()
        i = 0
        for batch in tqdm(train_loader, total=len(train_loader)):
            progress = epoch + i / len(train_loader)
            coco_ids, answers, *batch_input = [todevice(x, device) for x in batch]
            logits, others = model(*batch_input)
            ##################### loss #####################
            nll = -nn.functional.log_softmax(logits, dim=1)
            loss = (nll * answers / 10).sum(dim=1).mean()
            #################################################
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_value_(parameters, clip_value=0.5)
            optimizer.step()
            if (i + 1) % (len(train_loader) // 50) == 0:
                logging.info("Progress %.3f  ce_loss = %.3f" % (progress, loss.item()))
            i+=1
        train_acc,train_loss = validate(model, train_loader, device,withLossFlag = True,func = nn.functional)
        logging.info('\n ~~~~~~ Epoch: %.4f ~~~~~~~\n' % epoch)
        logging.info('\n ~~~~~~ Train Accuracy: %.4f ~~~~~~~\n' % train_acc)
        logging.info('\n ~~~~~~ Train Loss: %.4f ~~~~~~~\n' % train_loss)
        valid_acc,valid_loss = validate(model, val_loader, device,withLossFlag = True,func = nn.functional)
        logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc)
        logging.info('\n ~~~~~~ Valid Loss: %.4f ~~~~~~~\n' % valid_loss)
Exemplo n.º 9
0
def validate_with_david_generated_program(model, data, device, pretrained_dir):
    program_generator = load_program_generator(
        os.path.join(pretrained_dir, 'program_generator.pt')).to(device)
    david_vocab = json.load(
        open(os.path.join(pretrained_dir, 'david_vocab.json')))
    david_vocab['program_idx_to_token'] = invert_dict(
        david_vocab['program_token_to_idx'])
    details = {
        cat: [0, 0]
        for cat in
        {'count', 'compare number', 'exist', 'query', 'compare attribute'}
    }

    count, correct = 0, 0
    model.eval()
    print('validate...')
    for batch in tqdm(data.generator(), total=len(data)):
        answers, questions, gt_programs, gt_program_inputs, *batch_input = [
            todevice(x, device) for x in batch
        ]
        programs, program_inputs = [], []
        # generate program using david model for each question
        for i in range(questions.size(0)):
            question_str = []
            for j in range(questions.size(1)):
                word = data.vocab['question_idx_to_token'][questions[i,
                                                                     j].item()]
                if word == '<START>': continue
                if word == '<END>': break
                question_str.append(word)
            question_str = ' '.join(question_str)  # question string
            david_program = generate_single_program(question_str,
                                                    program_generator,
                                                    david_vocab, device)
            david_program = [
                david_vocab['program_idx_to_token'][i.item()]
                for i in david_program.squeeze()
            ]
            # convert david program to ours. return two index lists
            program, program_input = convert_david_program_to_mine(
                david_program, data.vocab)
            programs.append(program)
            program_inputs.append(program_input)
        # padding
        max_len = max(len(p) for p in programs)
        for i in range(len(programs)):
            while len(programs[i]) < max_len:
                programs[i].append(vocab['program_token_to_idx']['<NULL>'])
                program_inputs[i].append(
                    vocab['question_token_to_idx']['<NULL>'])
        # to tensor
        programs = torch.LongTensor(programs).to(device)
        program_inputs = torch.LongTensor(program_inputs).to(device)

        logits, others = model(programs, program_inputs, *batch_input)
        predicts = logits.max(1)[1]
        correct += torch.eq(predicts, answers).long().sum().item()
        count += answers.size(0)
        # details
        for i in range(len(answers)):
            for j in range(len(gt_programs[i])):
                program = data.vocab['program_idx_to_token'][gt_programs[i]
                                                             [j].item()]
                if program in [
                        '<NULL>', '<START>', '<END>', '<UNK>', 'unique'
                ]:
                    continue
                cat = map_program_to_cat[program]
                details[cat][0] += int(predicts[i].item() == answers[i].item())
                details[cat][1] += 1
                break
    acc = correct / count
    details = {k: (v[0] / v[1]) for k, v in details.items()}
    return acc, details
Exemplo n.º 10
0
def train(args):
    logging.info("Create train_loader and val_loader.........")
    train_loader_kwargs = {
        'question_pt': args.train_question_pt,
        'vocab_json': args.vocab_json,
        'feature_h5': args.feature_h5,
        'batch_size': args.batch_size,
        'spatial': args.spatial,
        'num_workers': 2,
        'shuffle': True
    }
    train_loader = VQADataLoader(**train_loader_kwargs)
    if args.val:
        val_loader_kwargs = {
            'question_pt': args.val_question_pt,
            'vocab_json': args.vocab_json,
            'feature_h5': args.feature_h5,
            'batch_size': args.batch_size,
            'spatial': args.spatial,
            'num_workers': 2,
            'shuffle': False
        }
        val_loader = VQADataLoader(**val_loader_kwargs)

    logging.info("Create model.........")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_kwargs = {
        'vocab': train_loader.vocab,
        'dim_v': args.dim_v,
        'dim_word': args.dim_word,
        'dim_hidden': args.dim_hidden,
        'dim_vision': args.dim_vision,
        'dim_edge': args.dim_edge,
        'cls_fc_dim': args.cls_fc_dim,
        'dropout_prob': args.dropout,
        'T_ctrl': args.T_ctrl,
        'glimpses': args.glimpses,
        'stack_len': args.stack_len,
        'device': device,
        'spatial': args.spatial,
        'use_gumbel': args.module_prob_use_gumbel == 1,
        'use_validity': args.module_prob_use_validity == 1,
    }
    model_kwargs_tosave = {
        k: v
        for k, v in model_kwargs.items() if k != 'vocab'
    }
    model = XNMNet(**model_kwargs).to(device)
    logging.info(model)
    logging.info('load glove vectors')
    train_loader.glove_matrix = torch.FloatTensor(
        train_loader.glove_matrix).to(device)
    model.token_embedding.weight.data.set_(train_loader.glove_matrix)
    ################################################################

    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(parameters, args.lr, weight_decay=0)

    start_epoch = 0
    if args.restore:
        print("Restore checkpoint and optimizer...")
        ckpt = os.path.join(args.save_dir, 'model.pt')
        ckpt = torch.load(ckpt, map_location={'cuda:0': 'cpu'})
        start_epoch = ckpt['epoch'] + 1
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer,
                                                 0.5**(1 / args.lr_halflife))

    logging.info("Start training........")
    for epoch in range(start_epoch, args.num_epoch):
        model.train()
        for i, batch in enumerate(train_loader):
            progress = epoch + i / len(train_loader)
            coco_ids, answers, *batch_input = [
                todevice(x, device) for x in batch
            ]
            logits, others = model(*batch_input)
            ##################### loss #####################
            nll = -nn.functional.log_softmax(logits, dim=1)
            loss = (nll * answers / 10).sum(dim=1).mean()
            #################################################
            scheduler.step()
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_value_(parameters, clip_value=0.5)
            optimizer.step()
            if (i + 1) % (len(train_loader) // 50) == 0:
                logging.info("Progress %.3f  ce_loss = %.3f" %
                             (progress, loss.item()))
        save_checkpoint(epoch, model, optimizer, model_kwargs_tosave,
                        os.path.join(args.save_dir, 'model.pt'))
        logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir))
        if args.val:
            valid_acc = validate(model, val_loader, device)
            logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' %
                         valid_acc)