Пример #1
0
def evaluation():
    test_data = aggregated_loader(args.img_dir,args.sal_dir,args.que_file,args.word2idx,'test',args.split_info,args.que_len,args.temporal_step,args.historical_step,transform,test_mode=True)
    testloader = torch.utils.data.DataLoader(test_data, batch_size=args.batch, shuffle=False, num_workers=1) # fix the batch size for evaluation as 1
    model = SWM_agg(embed_size=args.embedding_size,vocab=train_data.word2idx)
    model.load_state_dict(torch.load(os.path.join(args.checkpoint,'model_best.pth')),strict=True)
    model = model.cuda()

    model.eval()
    eval_score = dict()
    for metric in eval_metrics:
        eval_score[metric] = []

    for i, (img, que, sal, fix) in enumerate(testloader):
        # iterate through different frames of the same video sequence
        img, que = img.cuda(), que.cuda()
        pred = model(img,que)

        if len(pred) > 1:
            pred = pred.data.cpu().numpy().squeeze()
            sal = sal.numpy().squeeze()
            fix = fix.numpy().squeeze()
        else:
            pred = pred.data.cpu().numpy()
            sal = sal.numpy()
            fix = fix.numpy()
        for j in range(len(pred)):
            for k in range(len(pred[j])):
                cur_pred = pred[j,k] # evaluate on the current frame
                cur_pred = cv2.resize(cur_pred,(256,128))
                if cur_pred.max()>0:
                    cur_pred /= cur_pred.max()

                cur_sal = sal[j,k]
                cur_fix = fix[j,k]
                cur_pred = distortion_corr(cur_pred)
                cur_sal = distortion_corr(cur_sal)
                cur_fix = distortion_corr(cur_fix)

                if args.center_bias:
                    cur_pred = add_center_bias(cur_pred)
                eval_score['cc'].append(cal_cc_score(cur_pred,cur_sal))
                eval_score['sim'].append(cal_sim_score(cur_pred,cur_sal))
                eval_score['kld'].append(cal_kld_score(cur_pred,cur_sal))
                eval_score['nss'].append(cal_nss_score(cur_pred,cur_fix))
                eval_score['sauc'].append(cal_sauc_score(cur_pred,cur_fix,test_data.shuf_map))

    print('Evaluation scores for aggregated attention')
    for metric in eval_score.keys():
        print('%s: %d' %(metric.upper(),np.mean(eval_score[metric])))
Пример #2
0
    def validation(iteration):
        # initialize evaluation score
        model.eval()
        eval_score = dict()

        for metric in eval_metrics:
            eval_score[metric] = []

        for i, (img, que, sal, fix) in enumerate(testloader):
            # iterate through different frames of the same video sequence
            img, que = img.cuda(), que.cuda()
            pred = model(img,que)

            if len(pred) > 1:
                pred = pred.data.cpu().numpy().squeeze()
                sal = sal.numpy().squeeze()
                fix = fix.numpy().squeeze()
            else:
                pred = pred.data.cpu().numpy()
                sal = sal.numpy()
                fix = fix.numpy()
            for j in range(len(pred)):
                for k in range(len(pred[j])):
                    cur_pred = pred[j,k] # evaluate on the current frame
                    cur_pred = cv2.resize(cur_pred,(256,128))
                    if cur_pred.max()>0:
                        cur_pred /= cur_pred.max()

                    cur_sal = sal[j,k]
                    cur_fix = fix[j,k]
                    cur_pred = distortion_corr(cur_pred)
                    cur_sal = distortion_corr(cur_sal)
                    cur_fix = distortion_corr(cur_fix)

                    if args.center_bias:
                        cur_pred = add_center_bias(cur_pred)
                    eval_score['cc'].append(cal_cc_score(cur_pred,cur_sal))
                    eval_score['sim'].append(cal_sim_score(cur_pred,cur_sal))
                    eval_score['kld'].append(cal_kld_score(cur_pred,cur_sal))
                    eval_score['nss'].append(cal_nss_score(cur_pred,cur_fix))
                    eval_score['sauc'].append(cal_sauc_score(cur_pred,cur_fix,test_data.shuf_map))


        with tf_summary_writer.as_default():
            for metric in eval_score.keys():
                tf.summary.scalar(metric.upper(),np.mean(eval_score[metric]),step=iteration)

        return np.mean(eval_score['cc'])
Пример #3
0
def evaluation():
    test_data = conditional_loader(args.img_dir,
                                   args.sal_dir,
                                   args.que_file,
                                   args.word2idx,
                                   'test',
                                   args.split_info,
                                   args.que_len,
                                   args.temporal_step,
                                   args.historical_step,
                                   transform,
                                   test_mode=True)
    testloader = torch.utils.data.DataLoader(
        test_data, batch_size=args.batch, shuffle=False,
        num_workers=1)  # fix the batch size for evaluation as 1

    model = SWM(embed_size=args.embedding_size, vocab=train_data.word2idx)
    model = model.cuda()
    model.load_state_dict(
        torch.load(os.path.join(args.checkpoint, 'model_best.pth')))

    model.eval()
    eval_score = dict()
    label_pool = ['correct', 'incorrect']

    for cond_label in label_pool:
        eval_score[cond_label] = dict()
        for metric in eval_metrics:
            eval_score[cond_label][metric] = []

    for i, (img, que, cur_sal_pos, cur_fix_pos, cur_sal_neg,
            cur_fix_neg) in enumerate(testloader):
        # iterate through different frames of the same video sequence
        img, que = img.cuda(), que.cuda()
        pred = model(img, que)

        sal, fix = [], []
        if len(pred) > 1:
            pred = pred.data.cpu().numpy().squeeze()
            sal.append(cur_sal_pos.numpy().squeeze())
            sal.append(cur_sal_neg.numpy().squeeze())
            fix.append(cur_fix_pos.numpy().squeeze())
            fix.append(cur_fix_neg.numpy().squeeze())
        else:
            pred = pred.data.cpu().numpy()
            sal.append(cur_sal_pos.numpy())
            sal.append(cur_sal_neg.numpy())
            fix.append(cur_fix_pos.numpy())
            fix.append(cur_fix_neg.numpy())

        for j in range(len(pred)):
            for k in range(len(pred[j])):
                for cond_idx, cond_label in enumerate(label_pool):
                    cur_pred = pred[
                        j, k, cond_idx]  # only evaluate on the current frame
                    cur_pred = cv2.resize(cur_pred, (256, 128))
                    cur_sal = sal[cond_idx][j, k]
                    cur_fix = fix[cond_idx][j, k]

                    cur_pred = distortion_corr(cur_pred)
                    cur_sal = distortion_corr(cur_sal)
                    cur_fix = distortion_corr(cur_fix)

                    if args.center_bias:
                        cur_pred = add_center_bias(cur_pred)
                    eval_score[cond_label]['cc'].append(
                        cal_cc_score(cur_pred, cur_sal))
                    eval_score[cond_label]['sim'].append(
                        cal_sim_score(cur_pred, cur_sal))
                    eval_score[cond_label]['kld'].append(
                        cal_kld_score(cur_pred, cur_sal))
                    eval_score[cond_label]['nss'].append(
                        cal_nss_score(cur_pred, cur_fix))
                    eval_score[cond_label]['sauc'].append(
                        cal_sauc_score(cur_pred, cur_fix,
                                       test_data.shuf_map[cond_idx]))

    for cond_label in eval_score.keys():
        print('Evaluation scores for %s attention' % cond_label)
        for metric in eval_score[cond_label].keys():
            print('%s: %f' %
                  (metric.upper(), np.mean(eval_score[cond_label][metric])))
        print('\n')
Пример #4
0
    def validation(iteration):
        # initialize evaluation score
        model.eval()
        eval_score = dict()
        label_pool = ['correct', 'incorrect']

        for cond_label in label_pool:
            eval_score[cond_label] = dict()
            for metric in eval_metrics:
                eval_score[cond_label][metric] = []

        for i, (img, que, cur_sal_pos, cur_fix_pos, cur_sal_neg,
                cur_fix_neg) in enumerate(testloader):
            # iterate through different frames of the same video sequence
            img, que = img.cuda(), que.cuda()
            pred = model(img, que)

            sal, fix = [], []
            if len(pred) > 1:
                pred = pred.data.cpu().numpy().squeeze()
                sal.append(cur_sal_pos.numpy().squeeze())
                sal.append(cur_sal_neg.numpy().squeeze())
                fix.append(cur_fix_pos.numpy().squeeze())
                fix.append(cur_fix_neg.numpy().squeeze())
            else:
                pred = pred.data.cpu().numpy()
                sal.append(cur_sal_pos.numpy())
                sal.append(cur_sal_neg.numpy())
                fix.append(cur_fix_pos.numpy())
                fix.append(cur_fix_neg.numpy())

            for j in range(len(pred)):
                for k in range(len(pred[j])):
                    for cond_idx, cond_label in enumerate(label_pool):
                        cur_pred = pred[
                            j, k,
                            cond_idx]  # only evaluate on the current frame
                        cur_pred = cv2.resize(cur_pred, (256, 128))
                        cur_sal = sal[cond_idx][j, k]
                        cur_fix = fix[cond_idx][j, k]

                        cur_pred = distortion_corr(cur_pred)
                        cur_sal = distortion_corr(cur_sal)
                        cur_fix = distortion_corr(cur_fix)

                        if args.center_bias:
                            cur_pred = add_center_bias(cur_pred)
                        eval_score[cond_label]['cc'].append(
                            cal_cc_score(cur_pred, cur_sal))
                        eval_score[cond_label]['sim'].append(
                            cal_sim_score(cur_pred, cur_sal))
                        eval_score[cond_label]['kld'].append(
                            cal_kld_score(cur_pred, cur_sal))
                        eval_score[cond_label]['nss'].append(
                            cal_nss_score(cur_pred, cur_fix))
                        eval_score[cond_label]['sauc'].append(
                            cal_sauc_score(cur_pred, cur_fix,
                                           test_data.shuf_map[cond_idx]))

        with tf_summary_writer.as_default():
            for cond_label in eval_score.keys():
                for metric in eval_score[cond_label].keys():
                    tf.summary.scalar(cond_label + '_' + metric.upper(),
                                      np.mean(eval_score[cond_label][metric]),
                                      step=iteration)

        return np.mean(eval_score['correct']['cc'])
Пример #5
0
def main():
    img_rows, img_cols = 300, 400
    #initializing salicon data
    sal_anno = '/home/eric/Desktop/experiment/salicon/salicon-api/annotations/fixations_train2014.json'
    salicon = SALICON(sal_anno)

    #loading VQA data
    vqa_dict = np.load('valid_data_train.npy')
    question_bank = np.load('question_type.npy')
    answer_bank = np.load('answer_type.npy')
    vqa_dir = '/media/eric/New Volume/VQA/VQA_HAT/vqahat_train'

    #defining data structure
    metrics = ['cc', 'sim', 'kld', 'emd', 'spearmanr']
    que_score = dict()
    ans_score = dict()
    overall_score = dict()

    for question in question_bank:
        que_score[question] = init_metrics(metrics)
    for answer in answer_bank:
        ans_score[answer] = init_metrics(metrics)
    overall_score = init_metrics(metrics)

    #main loop for comparing different attention maps
    nan_count_q = dict()
    nan_count_a = dict()
    nan_corr_q = dict()
    nan_corr_a = dict()
    nan_count = 0
    nan_corr = 0
    for i in question_bank:
        nan_count_q[i] = 0
        nan_corr_q[i] = 0
    for i in answer_bank:
        nan_count_a[i] = 0
        nan_corr_a[i] = 0

    for cur_data in vqa_dict:
        question_id = cur_data['question_id']
        question_type = cur_data['question_type']
        answer_type = cur_data['answer_type']
        img_id = cur_data['img_id']

        #load vqa attention map
        vqa_img = os.path.join(vqa_dir, str(question_id) + '_1.png')
        que_att_map = cv2.imread(vqa_img)
        que_att_map = que_att_map[:, :, 0]
        que_att_map = cv2.resize(que_att_map, (img_cols, img_rows),
                                 interpolation=cv2.INTER_LINEAR)
        que_att_map = que_att_map.astype('float32')
        que_att_map /= 255

        #load free-viewing attention map
        annIds = salicon.getAnnIds(img_id)
        anns = salicon.loadAnns(annIds)
        fv_att_map = salicon.showAnns(anns)
        fv_att_map = cv2.resize(fv_att_map, (img_cols, img_rows),
                                interpolation=cv2.INTER_LINEAR)

        #computing scores for different metrics
        cc = cal_cc_score(fv_att_map, que_att_map)
        sim = cal_sim_score(fv_att_map, que_att_map)
        kld = cal_kld_score(fv_att_map, que_att_map)
        emd = cal_emd_score(fv_att_map, que_att_map)
        rank_corr, _ = spearmanr(fv_att_map.reshape(-1),
                                 que_att_map.reshape(-1))

        #storing data in a naive way
        if np.isnan(cc):
            cc = 0
            nan_count_q[question_type] += 1
            nan_count_a[answer_type] += 1
            nan_count += 1
        if np.isnan(rank_corr):
            rank_corr = 0
            nan_corr_q[question_type] += 1
            nan_corr_a[answer_type] += 1
            nan_corr += 1

        que_score[question_type]['cc'] += cc
        que_score[question_type]['sim'] += sim
        que_score[question_type]['spearmanr'] += rank_corr
        que_score[question_type]['kld'] += kld
        que_score[question_type]['emd'] += emd
        que_score[question_type]['count'] += 1

        ans_score[answer_type]['cc'] += cc
        ans_score[answer_type]['sim'] += sim
        ans_score[answer_type]['spearmanr'] += rank_corr
        ans_score[answer_type]['kld'] += kld
        ans_score[answer_type]['emd'] += emd
        ans_score[answer_type]['count'] += 1

        overall_score['cc'] += cc
        overall_score['sim'] += sim
        overall_score['spearmanr'] += rank_corr
        overall_score['kld'] += kld
        overall_score['emd'] += emd
        overall_score['count'] += 1

    #computing average score
    for q_type in question_bank:
        for cur_metric in metrics:
            if cur_metric == 'cc':
                que_score[q_type][cur_metric] /= que_score[q_type][
                    'count'] - nan_count_q[q_type]
            elif cur_metric == 'spearmanr':
                que_score[q_type][cur_metric] /= que_score[q_type][
                    'count'] - nan_corr_q[q_type]
            else:
                que_score[q_type][cur_metric] /= que_score[q_type]['count']

    for a_type in answer_bank:
        for cur_metric in metrics:
            if cur_metric == 'cc':
                ans_score[a_type][cur_metric] /= ans_score[a_type][
                    'count'] - nan_count_a[a_type]
            elif cur_metric == 'spearmanr':
                ans_score[a_type][cur_metric] /= ans_score[a_type][
                    'count'] - nan_corr_a[a_type]
            else:
                ans_score[a_type][cur_metric] /= ans_score[a_type]['count']

    for cur_metric in metrics:
        if cur_metric == 'cc':
            overall_score[cur_metric] /= overall_score['count'] - nan_count
        elif cur_metric == 'spearmanr':
            overall_score[cur_metric] /= overall_score['count'] - nan_corr
        else:
            overall_score[cur_metric] /= overall_score['count']

    np.save('question_score', que_score)
    np.save('answer_score', ans_score)
    np.save('overall_score', overall_score)
Пример #6
0
def multi_question():
    img_rows, img_cols = 300, 400
    vqa_dir = '/media/eric/New Volume/VQA/VQA_HAT/vqahat_train'
    IQ_pair = np.load('multi_question.npy')
    metrics = ['cc', 'sim', 'kld', 'emd', 'spearmanr']
    inter_score = dict()
    score = init_metrics(metrics)

    #main loop for comparing different attention maps
    nan_cc = 0
    nan_corr = 0
    for cur in IQ_pair.item():
        sal_map = []
        for q_id in IQ_pair.item()[cur]:
            I_dir = os.path.join(vqa_dir, str(q_id) + '_1.png')
            I = cv2.imread(I_dir)
            I = cv2.resize(I, (img_cols, img_rows),
                           interpolation=cv2.INTER_LINEAR)
            I = I[:, :, 0]
            I = I.astype('float32')
            sal_map.append(I)
        tmp_pair = [(0, 1), (0, 2), (1, 2)] if len(sal_map) == 3 else [(0, 1)]
        if len(sal_map) == 1:
            continue
        tmp_cc = 0
        tmp_kld = 0
        tmp_sim = 0
        tmp_corr = 0
        nan_corr_ = 0
        nan_cc_ = 0
        for pair in tmp_pair:
            cc = cal_cc_score(sal_map[pair[0]], sal_map[pair[1]])
            tmp_kld += cal_kld_score(sal_map[pair[0]], sal_map[pair[1]])
            tmp_sim += cal_sim_score(sal_map[pair[0]], sal_map[pair[1]])
            corr, _ = spearmanr(sal_map[pair[0]].reshape(-1),
                                sal_map[pair[1]].reshape(-1))

            if np.isnan(cc):
                nan_cc_ += 1
            else:
                tmp_cc += cc
            if np.isnan(corr):
                nan_corr_ += 1
            else:
                tmp_corr += corr
        score['count'] += 1
        score['kld'] += tmp_kld / len(sal_map)
        score['sim'] += tmp_sim / len(sal_map)
        if len(sal_map) - nan_cc_ > 0:
            score['cc'] += tmp_cc / (len(sal_map) - nan_cc_)
        else:
            nan_cc += 1
        if len(sal_map) - nan_corr_ > 0:
            score['spearmanr'] += tmp_corr / (len(sal_map) - nan_corr_)
        else:
            nan_corr += 1

    for metric in metrics:
        if metric == 'cc':
            score[metric] /= score['count'] - nan_cc
        elif metric == 'spearmanr':
            score[metric] /= score['count'] - nan_corr
        else:
            score[metric] /= score['count']

    np.save('multi_question_score', score)