def do_test(loadPath):
    print('Loading model from: %s' % loadPath)
    with open(loadPath, 'r') as fileId:
        loaded = pickle.load(fileId);

    #------------------------------------------------------------------------
    # build dataset, load agents
    #------------------------------------------------------------------------
    params = loaded['params'];
    data = Dataloader(params);

    team = Team(params);
    team.loadModel(loaded);
    team.evaluate();
    #------------------------------------------------------------------------
    # test agents
    #------------------------------------------------------------------------
    dtypes = ['train']
    for dtype in dtypes:
        # evaluate on the train dataset, using greedy policy
        images, tasks, labels = data.getCompleteData(dtype);
        # forward pass
        preds, _, talk, talk_list = team.forward(Variable(images), Variable(tasks), True);

        options = dict()
        options['qOutVocab'] = 3
        options['aOutVocab'] = 4
        m1,m2,ic1,ic2,h1,h2 = all_metrics(team, preds, talk_list, options)

        # compute accuracy for first, second and both attributes
        firstMatch = preds[0].data == labels[:, 0].long();
        secondMatch = preds[1].data == labels[:, 1].long();
        matches = firstMatch & secondMatch;
        atleastOne = firstMatch | secondMatch;

        # compute accuracy
        firstAcc = 100 * torch.mean(firstMatch.float());
        secondAcc = 100 * torch.mean(secondMatch.float());
        atleastAcc = 100 * torch.mean(atleastOne.float());
        accuracy = 100 * torch.mean(matches.float());
        print('\nOverall accuracy [%s]: %.2f (f: %.2f s: %.2f, atleast: %.2f)'\
                        % (dtype, accuracy, firstAcc, secondAcc, atleastAcc));

        # pretty print
        talk = data.reformatTalk(talk, preds, images, tasks, labels);
        if 'final' in loadPath:
            savePath = loadPath.replace('final', 'chatlog-'+dtype);
        elif 'inter' in loadPath:
            savePath = loadPath.replace('inter', 'chatlog-'+dtype);
        savePath = savePath.replace('pickle', 'json');
        print('Saving conversations: %s' % savePath)
        with open(savePath, 'w') as fileId: json.dump(talk, fileId);
        saveResultPage(savePath);

        res1 = accuracy, firstAcc, secondAcc, atleastAcc
        res2 = m1,m2,ic1,ic2,h1,h2
        return res1, res2
Exemplo n.º 2
0
                                                        params['negFraction'])

    # forward pass
    team.forward(Variable(batchImg), Variable(batchTask))
    # backward pass
    batchReward = team.backward(optimizer, batchLabels, epoch)

    # take a step by optimizer
    optimizer.step()
    #--------------------------------------------------------------------------
    # switch to evaluate
    team.evaluate()

    for dtype in ['train', 'test']:
        # get the entire batch
        img, task, labels = data.getCompleteData(dtype)
        # evaluate on the train dataset, using greedy policy
        guess, _, _ = team.forward(Variable(img), Variable(task))
        # compute accuracy for color, shape, and both
        firstMatch = guess[0].data == labels[:, 0].long()
        secondMatch = guess[1].data == labels[:, 1].long()
        matches[dtype] = firstMatch & secondMatch
        accuracy[dtype] = 100*torch.sum(matches[dtype])\
                                    /float(matches[dtype].size(0))
    # switch to train
    team.train()

    # break if train accuracy reaches 100%
    if accuracy['train'] == 100: break

    # save for every 5k epochs
            probs_c = F.softmax(logits_c, dim=0).data.numpy()
            cic = calc_cic(p_a_given_do_c[ag], probs_c, env.n_comm, env.n_acts)
            cics[ag].append(cic)

    return cics


if __name__ == '__main__':
    loadPath = sys.argv[1]
    print('Loading model from: %s' % loadPath)
    with open(loadPath, 'r') as fileId:
        loaded = pickle.load(fileId)

    params = loaded['params']
    data = Dataloader(params)

    team = Team(params)
    team.loadModel(loaded)
    team.evaluate()
    team.setOverhear(False)
    team.setOverhearTask(False)

    for dtype in ['train', 'test']:
        print('%s metrics' % dtype)
        images, tasks, labels = data.getCompleteData(dtype)
        # forward pass
        preds1,preds2,_,_,talk1,talk2 = team.forward(Variable(images),\
         Variable(tasks), Variable(images), Variable(tasks), True)

        all_metrics(team, preds1, preds2, talk1, talk2, params)
Exemplo n.º 4
0
def runMAMLtrain(runName='single'):

    #------------------------------------------------------------------------
    # setup experiment and dataset
    #------------------------------------------------------------------------
    data = Dataloader(options)
    numInst = data.getInstCount()

    params = data.params
    # append options from options to params
    for key, value in options.items():
        params[key] = value

    ### checking and creating the folders and results files
    #------------------------------------------------------------------------
    # build agents, and setup optmizer
    #------------------------------------------------------------------------
    team = Team(params)
    team.train()

    #------------------------------------------------------------------------
    # train agents
    #------------------------------------------------------------------------
    # begin training

    ### split tasks into train, valid and test, storing the split in data Object
    task_list = [t for t in range(data.numPairTasks)]

    num_train_tasks = 11
    num_test_tasks = 1
    train_tasks = task_list[:num_train_tasks]
    test_tasks = task_list[num_train_tasks:]
    data.seenTaskList = torch.LongTensor(train_tasks)
    data.unseenTaskList = torch.LongTensor(test_tasks)

    count = 0
    savePath = 'models/' + MODELNAME + '/' + "Remember:" + str(
        params['remember']) + "_AoutVocab=" + str(
            params['aOutVocab']) + "_QoutVocab=" + str(
                params['qOutVocab']) + "/" + "run" + str(runName)
    makeDirs(savePath)
    best_results = load_best_results(MODELNAME, runName, params)

    matches = {}
    accuracy = {}
    matches_unseen = {}
    accuracy_unseen = {}
    bestAccuracy = 0

    for param in team.aBot.parameters():
        param.requires_grad = False
    for param in team.qBot.parameters():
        param.requires_grad = False

    for episode in range(params['num_episodes']):

        totalReward = 0
        sampled_tasks = sample(train_tasks, params['num_tasks_per_episode'])

        stored_abot_params = []
        stored_qbot_params = []

        for task in sampled_tasks:
            ## create copy of team for inner update, and inner optimizers
            batch_task_list = torch.LongTensor([
                task for i in range(params['batchSize'])
            ])  ### all tasks should be the same in an iteration with maml
            copied_team = deepcopy(team)
            for param in copied_team.aBot.parameters():
                param.requires_grad = True
            for param in copied_team.qBot.parameters():
                param.requires_grad = True


            optimizer_inner = optim.Adam([{'params': copied_team.aBot.parameters(), \
                                    'lr':params['learningRate_inner']},\
                            {'params': copied_team.qBot.parameters(), \
                                    'lr':params['learningRate_inner']}])

            # get double attribute tasks
            if 'train' not in matches:
                batchImg, batchTask, batchLabels \
                                    = data.getBatch(params['batchSize'], tasks=batch_task_list)
            else:
                batchImg, batchTask, batchLabels \
                        = data.getBatchSpecial(params['batchSize'], matches['train'],\
                                                                params['negFraction'], tasks=batch_task_list)

            for inner_step in range(params['inner_steps'] - 1):

                # forward pass
                copied_team.forward(Variable(batchImg), Variable(batchTask))
                # backward pass
                batchReward = copied_team.backward(optimizer_inner,
                                                   batchLabels, episode)

                # take a step by optimizer
                optimizer_inner.step()
                optimizer_inner.zero_grad()
                #--------------------------------------------------------------------------
                # switch to evaluate

            ## last inner step grads will be transferred to the main model update
            ## sampling query set
            if 'train' not in matches:
                batchImg, batchTask, batchLabels \
                                    = data.getBatch(params['batchSize'], tasks=batch_task_list)
            else:
                batchImg, batchTask, batchLabels \
                        = data.getBatchSpecial(params['batchSize'], matches['train'],\
                                                                params['negFraction'], tasks=batch_task_list)

            # forward pass
            copied_team.forward(Variable(batchImg), Variable(batchTask))
            #totalReward += copied_team.totalReward
            # backward pass
            batchReward = copied_team.backward(optimizer_inner, batchLabels,
                                               episode)

            ## storing inner gradients
            stored_abot_params.append(copied_team.aBot.parameters())
            stored_qbot_params.append(copied_team.qBot.parameters())

        ## get the stored gradients and update the original model
        for stored_abot_param_list in stored_abot_params:
            ABotParamList = [p for p in team.aBot.parameters()]
            for paramInd, param in enumerate(stored_abot_param_list):
                ABotParamList[paramInd] -= params['learningRate'] * param.grad

        for stored_qbot_param_list in stored_qbot_params:
            QBotParamList = [p for p in team.qBot.parameters()]
            for paramInd, param in enumerate(stored_qbot_param_list):
                QBotParamList[paramInd] -= params['learningRate'] * param.grad

        ## reducing lr
        if episode + 1 % 1000 == 0:
            params['learningRate'] /= 5
            params['learningRate_inner'] /= 5

        ### checking after certain episodes
        if episode % params['validation_frequency'] == 0:
            team.evaluate()
            best_avg_valid_acc = 0.5 * best_results[
                "valid_seen_domains"] + 0.5 * best_results[
                    "valid_unseen_domains"]

            for dtype in ['train', 'valid', 'test']:
                # get the entire batch
                img, task, labels = data.getCompleteData(dtype)
                # evaluate on the train dataset, using greedy policy
                guess, _, _ = team.forward(Variable(img), Variable(task))
                # compute accuracy for color, shape, and both

                firstMatch = guess[0].data == labels[:, 0].long()
                secondMatch = guess[1].data == labels[:, 1].long()
                matches[dtype] = firstMatch & secondMatch
                accuracy[dtype] = 100*torch.sum(matches[dtype])\
                                            /float(matches[dtype].size(0))

            ### chack acc on unseen domains
            for dtype in ['train', 'valid', 'test']:
                # get the entire batch
                img, task, labels = data.getCompleteData(dtype, 'unseen')
                # evaluate on the train dataset, using greedy policy
                guess, _, _ = team.forward(Variable(img), Variable(task))
                # compute accuracy for color, shape, and both

                firstMatch = guess[0].data == labels[:, 0].long()
                secondMatch = guess[1].data == labels[:, 1].long()
                matches_unseen[dtype] = firstMatch & secondMatch
                accuracy_unseen[dtype] = 100*torch.sum(matches_unseen[dtype])\
                                            /float(matches_unseen[dtype].size(0))

            time = strftime("%a, %d %b %Y %X", gmtime())
            avg_valid_acc = 0.5 * accuracy['valid'].item(
            ) + 0.5 * accuracy_unseen['valid'].item()
            print('[%s][Episode: %.2f][Query set total reward: %.4f][SEEN TASK--Tr acc: %.2f Valid acc: %.2f Test acc: %.2f][UNSEEN TASK--Tr acc: %.2f V acc: %.2f Tst acc: %.2f]' % \
                            (time, episode, totalReward,\
                            accuracy['train'], accuracy['valid'], accuracy['test'], accuracy_unseen['train'], accuracy_unseen['valid'], accuracy_unseen['test']))

            # save model and res if validation accuracy is the best
            if avg_valid_acc >= best_avg_valid_acc:
                saveModel(savePath, team, optimizer_inner, params)
                best_results = {
                    "train_seen_domains": accuracy['train'].item(),
                    "valid_seen_domains": accuracy['valid'].item(),
                    "test_seen_domains": accuracy['test'].item(),
                    "train_unseen_domains": accuracy_unseen['train'].item(),
                    "valid_unseen_domains": accuracy_unseen['valid'].item(),
                    "test_unseen_domains": accuracy_unseen['test'].item()
                }
                store_results(best_results, MODELNAME, runName, params)
                best_avg_valid_acc = 0.5 * best_results[
                    "valid_seen_domains"] + 0.5 * best_results[
                        "valid_unseen_domains"]
            # break if train accuracy reaches 100%
            if accuracy['train'] == 100: break
            # switch to train

            team.train()

    ### save final model
    ##saveModel(savePath, team, optimizer_inner, params)
    print("run finished, returning best results")
    return best_results
Exemplo n.º 5
0
def runOriginalModelTrain(runName = 'single' ):
    

    #------------------------------------------------------------------------
    # setup experiment and dataset
    #------------------------------------------------------------------------
    data = Dataloader(options)
    numInst = data.getInstCount()

    ### split tasks into train, valid and test, storing the split in data Object
    task_list = [t for t in range(data.numPairTasks)]

    num_train_tasks = 11
    num_test_tasks = 1
    train_tasks = task_list[:num_train_tasks]
    test_tasks = task_list[num_train_tasks:] 
    data.seenTaskList = torch.LongTensor(train_tasks)
    data.unseenTaskList = torch.LongTensor(test_tasks)

    params = data.params
    # append options from options to params
    for key, value in options.items():
        params[key] = value

    ### checking and creating the folders and results files
    #------------------------------------------------------------------------
    # build agents, and setup optmizer
    #------------------------------------------------------------------------
    team = Team(params)
    team.train()
    optimizer = optim.Adam([{'params': team.aBot.parameters(), \
                                    'lr':params['learningRate']},\
                            {'params': team.qBot.parameters(), \
                                    'lr':params['learningRate']}])
    #------------------------------------------------------------------------
    # train agents
    #------------------------------------------------------------------------
    # begin training
    numIterPerEpoch = int(np.ceil(numInst['train']/params['batchSize']))
    numIterPerEpoch = max(1, numIterPerEpoch)
    count = 0

    savePath = 'models/' + MODELNAME + '/' + "Remember:" + str(params['remember']) + "_AoutVocab=" + str(params['aOutVocab']) + "_QoutVocab="+ str(params['qOutVocab']) + "/" + "run"+str(runName)
    makeDirs(savePath)
    best_results = load_best_results(MODELNAME, runName, params)

    matches = {}
    accuracy = {}
    matches_unseen = {}
    accuracy_unseen = {}
    bestAccuracy = 0
    for iterId in range(params['numEpochs'] * numIterPerEpoch):
        epoch = float(iterId)/numIterPerEpoch

        # get double attribute tasks
        if 'train' not in matches:
            batchImg, batchTask, batchLabels \
                                = data.getBatch(params['batchSize'])
        else:
            batchImg, batchTask, batchLabels \
                    = data.getBatchSpecial(params['batchSize'], matches['train'],\
                                                            params['negFraction'])

        # forward pass
        team.train()
        team.forward(Variable(batchImg), Variable(batchTask))
        # backward pass
        batchReward = team.backward(optimizer, batchLabels, epoch)

        # take a step by optimizer
        optimizer.step()
        optimizer.zero_grad()
        #--------------------------------------------------------------------------

        ## checking model performannce after certain iters
        if iterId % params['validation_frequency'] == 0:
            # switch to evaluate
            team.evaluate()
            best_avg_valid_acc = 0.5* best_results["valid_seen_domains"] + 0.5 * best_results["valid_unseen_domains"]


            for dtype in ['train', 'valid', 'test']:
                # get the entire batch
                img, task, labels = data.getCompleteData(dtype)
                # evaluate on the train dataset, using greedy policy
                guess, _, _ = team.forward(Variable(img), Variable(task))

                # compute accuracy for color, shape, and both

                firstMatch = guess[0].data == labels[:, 0].long()
                secondMatch = guess[1].data == labels[:, 1].long()
                matches[dtype] = firstMatch & secondMatch
                accuracy[dtype] = 100*torch.sum(matches[dtype])\
                                            /float(matches[dtype].size(0))


            ### check acc on unseen domains
            for dtype in ['train','valid', 'test']:
                # get the entire batch
                img, task, labels = data.getCompleteData(dtype, 'unseen')
                # evaluate on the train dataset, using greedy policy
                guess, _, _ = team.forward(Variable(img), Variable(task))
                # compute accuracy for color, shape, and both

                firstMatch = guess[0].data == labels[:, 0].long()
                secondMatch = guess[1].data == labels[:, 1].long()
                matches_unseen[dtype] = firstMatch & secondMatch
                accuracy_unseen[dtype] = 100*torch.sum(matches_unseen[dtype])\
                                            /float(matches_unseen[dtype].size(0))
                
            avg_valid_acc = 0.5*accuracy['valid'].item() + 0.5*accuracy_unseen['valid'].item()
            # save model and res if validation accuracy is the best
            if avg_valid_acc >= best_avg_valid_acc:
                saveModel(savePath, team, optimizer, params)
                best_results = {
                    "train_seen_domains" : accuracy['train'].item(),
                    "valid_seen_domains" : accuracy['valid'].item(),
                    "test_seen_domains": accuracy['test'].item(),
                    "train_unseen_domains": accuracy_unseen['train'].item(),
                    "valid_unseen_domains": accuracy_unseen['valid'].item(),
                    "test_unseen_domains" :  accuracy_unseen['test'].item(),
                }
                store_results(best_results, MODELNAME, runName, params)
                best_avg_valid_acc = 0.5* best_results["valid_seen_domains"] + 0.5 * best_results["valid_unseen_domains"]

            # break if train accuracy reaches 100%
            if accuracy['train'] == 100: break

            if iterId % 100 != 0: continue

            time = strftime("%a, %d %b %Y %X", gmtime())


            print('[%s][Iter: %d][Ep: %.2f][R: %.4f][SEEN TASKS--Train: %.2f Valid: %.2f Test: %.2f][UNSEEN TASKS--Train: %.2f V: %.2f Tst: %.2f]' % \
                                        (time, iterId, epoch, team.totalReward,\
                                        accuracy['train'], accuracy['valid'], accuracy['test'], accuracy_unseen['train'], accuracy_unseen['valid'],accuracy_unseen['test'],))
            


    ##saveModel(savePath, team, optimizer, params)
    print("run finished, returning best results")
    return best_results