Exemplo n.º 1
0
def detect(re_detect=True, save_path='my_test_relation_prediction.json',
           top_tree=10, overlap=0.3, iou_thr=0.3):
    dataset = VidVRD(anno_rpath=anno_rpath,
                     video_rpath=video_rpath,
                     splits=splits)
    with open(os.path.join(get_model_path(), 'baseline_setting.json'), 'r') as fin:
        param = json.load(fin)

    if re_detect:
        short_term_relations = model.predict(dataset, param)
        with open(short_term_predication_path, 'w+') as stp_out_f:
            stp_out_f.write(json.dumps(short_term_relations))
        print("Successfully save short-term predication to: " + short_term_predication_path)

    else:
        with open(short_term_predication_path, 'r') as stp_in_f:
            short_term_relations = json.load(stp_in_f)

    # print('greedy relational association ...')
    print('origin mht association...')
    video_relations = dict()
    for vid in tqdm(short_term_relations.keys()):
        # res = association.greedy_relational_association(short_term_relations[vid], param['seg_topk'])
        res = origin_mht_relational_association(short_term_relations[vid], param['seg_topk'],
                                                top_tree=top_tree, overlap=overlap, iou_thr=iou_thr)
        res = sorted(res, key=lambda r: r['score'], reverse=True)[:param['video_topk']]
        video_relations[vid] = res
    # save detection result
    with open(os.path.join(get_model_path(), save_path), 'w+') as fout:
        output = {
            'version': 'VERSION 1.0',
            'results': video_relations
        }
        json.dump(output, fout)
Exemplo n.º 2
0
def load_relation_feature():
    """
    Test loading precomputed relation features
    """
    dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                     ['train', 'test'])
    extractor = feature.FeatureExtractor(dataset, prefetch_count=0)

    video_indices = dataset.get_index(split='train')
    for vid in video_indices:
        durations = set(
            rel_inst['duration']
            for rel_inst in dataset.get_relation_insts(vid, no_traj=True))
        for duration in durations:
            segs = segment_video(*duration)
            for fstart, fend in segs:
                extractor.extract_feature(dataset,
                                          vid,
                                          fstart,
                                          fend,
                                          verbose=True)

    video_indices = dataset.get_index(split='test')
    for vid in video_indices:
        anno = dataset.get_anno(vid)
        segs = segment_video(0, anno['frame_count'])
        for fstart, fend in segs:
            extractor.extract_feature(dataset, vid, fstart, fend, verbose=True)
Exemplo n.º 3
0
def train():
    dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                     ['train', 'test'])

    param = dict()
    param['model_name'] = 'baseline'
    param['rng_seed'] = 1701
    param['max_sampling_in_batch'] = 32
    param['batch_size'] = 64
    param['learning_rate'] = 0.001
    param['weight_decay'] = 0.0
    param['max_iter'] = 5000
    param['display_freq'] = 1
    param['save_freq'] = 5000
    param['epsilon'] = 1e-8
    param['pair_topk'] = 20
    param['seg_topk'] = 200
    print(param)

    model.train(dataset, param)
Exemplo n.º 4
0
def train():
    dataset = VidVRD(anno_rpath=anno_rpath,
                     video_rpath=video_rpath,
                     splits=splits)
    param = dict()
    param['model_name'] = 'baseline'
    param['rng_seed'] = 1701
    param['max_sampling_in_batch'] = 32
    param['batch_size'] = 64
    param['learning_rate'] = 0.001
    param['weight_decay'] = 0.0
    param['max_iter'] = 5000
    param['display_freq'] = 1
    param['save_freq'] = 5000
    param['epsilon'] = 1e-8
    param['pair_topk'] = 20
    param['seg_topk'] = 200
    print(param)

    model.train(dataset, param)
Exemplo n.º 5
0
def load_object_trajectory_proposal():
    """
    Test loading precomputed object trajectory proposals
    """
    dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                     ['train', 'test'])

    video_indices = dataset.get_index(split='train')
    for vid in video_indices:
        durations = set(
            rel_inst['duration']
            for rel_inst in dataset.get_relation_insts(vid, no_traj=True))
        for duration in durations:
            segs = segment_video(*duration)
            for fstart, fend in segs:
                trajs = trajectory.object_trajectory_proposal(dataset,
                                                              vid,
                                                              fstart,
                                                              fend,
                                                              gt=False,
                                                              verbose=True)
                trajs = trajectory.object_trajectory_proposal(dataset,
                                                              vid,
                                                              fstart,
                                                              fend,
                                                              gt=True,
                                                              verbose=True)

    video_indices = dataset.get_index(split='test')
    for vid in video_indices:
        anno = dataset.get_anno(vid)
        segs = segment_video(0, anno['frame_count'])
        for fstart, fend in segs:
            trajs = trajectory.object_trajectory_proposal(dataset,
                                                          vid,
                                                          fstart,
                                                          fend,
                                                          gt=False,
                                                          verbose=True)
            trajs = trajectory.object_trajectory_proposal(dataset,
                                                          vid,
                                                          fstart,
                                                          fend,
                                                          gt=True,
                                                          verbose=True)
Exemplo n.º 6
0
def detect():
    dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                     ['train', 'test'])
    with open(os.path.join(get_model_path(), 'baseline_setting.json'),
              'r') as fin:
        param = json.load(fin)
    short_term_relations = model.predict(dataset, param)
    # group short term relations by video
    video_st_relations = defaultdict(list)
    for index, st_rel in short_term_relations.items():
        vid = index[0]
        video_st_relations[vid].append((index, st_rel))
    # video-level visual relation detection by relational association
    print('greedy relational association ...')
    video_relations = dict()
    for vid in tqdm(video_st_relations.keys()):
        video_relations[vid] = association.greedy_relational_association(
            dataset, video_st_relations[vid], max_traj_num_in_clip=100)
    # save detection result
    with open(
            os.path.join(get_model_path(),
                         'baseline_relation_prediction.json'), 'w') as fout:
        output = {'version': 'VERSION 1.0', 'results': video_relations}
        json.dump(output, fout)
Exemplo n.º 7
0
def eval_short_term_relation():
    """
    Evaluate short-term relation prediction
    """
    anno_rpath = 'baseline/vidvrd-dataset'
    video_rpath = 'baseline/vidvrd-dataset/videos'
    splits = ['train', 'test']
    st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json'
    test_st_pred = '/home/daivd/Downloads/pad_result_24000_test_predicate_-1_pair_nms_0.4_rpn_nms_0.7_0.255_union.json'

    res_path = test_st_pred

    dataset = VidVRD(anno_rpath=anno_rpath,
                     video_rpath=video_rpath,
                     splits=splits)

    with open(os.path.join(get_model_path(), 'baseline_setting.json'), 'r') as fin:
        param = json.load(fin)

    if os.path.exists(res_path):
        with open(res_path, 'r') as fin:
            short_term_relations = json.load(fin)
    else:
        short_term_relations = model.predict(dataset, param)
        with open(res_path, 'w') as fout:
            json.dump(short_term_relations, fout)

    short_term_gt = dict()
    short_term_pred = dict()
    video_indices = dataset.get_index(split='test')
    for vid in video_indices:
        anno = dataset.get_anno(vid)
        segs = segment_video(0, anno['frame_count'])
        video_gts = dataset.get_relation_insts(vid)

        if 'results' in short_term_relations.keys():
            video_preds = short_term_relations['results'][vid]
        else:
            video_preds = short_term_relations[vid]

        for fstart, fend in segs:
            vsig = get_segment_signature(vid, fstart, fend)

            segment_gts = []
            for r in video_gts:
                s = max(r['duration'][0], fstart)
                e = min(r['duration'][1], fend)
                if s < e:
                    sub_trac = r['sub_traj'][s - r['duration'][0]: e - r['duration'][0]]
                    obj_trac = r['obj_traj'][s - r['duration'][0]: e - r['duration'][0]]
                    segment_gts.append({
                        "triplet": r['triplet'],
                        "subject_tid": r['subject_tid'],
                        "object_tid": r['object_tid'],
                        "duration": [s, e],
                        "sub_traj": sub_trac,
                        "obj_traj": obj_trac
                    })
            short_term_gt[vsig] = segment_gts

            segment_preds = []
            for r in video_preds:
                if fstart <= r['duration'][0] and r['duration'][1] <= fend:
                    s = max(r['duration'][0], fstart)
                    e = min(r['duration'][1], fend)
                    sub_trac = r['sub_traj'][s - r['duration'][0]: e - r['duration'][0]]
                    obj_trac = r['obj_traj'][s - r['duration'][0]: e - r['duration'][0]]
                    segment_preds.append({
                        "triplet": r['triplet'],
                        "score": r['score'],
                        "duration": [s, e],
                        "sub_traj": sub_trac,
                        "obj_traj": obj_trac
                    })
            short_term_pred[vsig] = segment_preds

    for each_vsig in short_term_gt.keys():
        if each_vsig not in short_term_pred.keys():
            short_term_pred[each_vsig] = []

    mean_ap, rec_at_n, mprec_at_n = eval_visual_relation(short_term_gt, short_term_pred)

    print('detection mean AP (used in challenge): {}'.format(mean_ap))
    print('detection recall@50: {}'.format(rec_at_n[50]))
    print('detection recall@100: {}'.format(rec_at_n[100]))
    print('tagging precision@1: {}'.format(mprec_at_n[1]))
    print('tagging precision@5: {}'.format(mprec_at_n[5]))
    print('tagging precision@10: {}'.format(mprec_at_n[10]))
Exemplo n.º 8
0
            if len(zs_gt_relations) > 0:
                groundtruth[vid] = zs_gt_relations
                zs_prediction[vid] = []
                for r in prediction[vid]:
                    if tuple(r['triplet']) in zeroshot_triplets:
                        zs_prediction[vid].append(r)
        mean_ap, rec_at_n, mprec_at_n = eval_visual_relation(
            groundtruth, zs_prediction)


if __name__ == '__main__':
    anno_rpath = 'vidvrd-dataset'
    video_rpath = ''
    splits = ['test']
    dataset = VidVRD(anno_rpath=anno_rpath,
                     video_rpath=video_rpath,
                     splits=splits)

    top_tree = 20
    overlap = 0.3
    iou_thr = 0.8

    test_vid = 'ILSVRC2015_train_00066007'
    config = '{}_{}_{}'.format(top_tree, overlap, iou_thr)

    prediction_out = 'test_out_{}.json'.format(config)

    if os.path.exists(prediction_out):
        print('Loading prediction from {}'.format(prediction_out))
        with open(prediction_out, 'r') as fin:
            result = json.load(fin)
Exemplo n.º 9
0
                        help='the dataset name for evaluation')
    parser.add_argument('split',
                        type=str,
                        help='the split name for evaluation')
    parser.add_argument('task',
                        choices=['object', 'action', 'relation'],
                        help='which task to evaluate')
    parser.add_argument('prediction',
                        type=str,
                        help='Corresponding prediction JSON file')
    args = parser.parse_args()

    if args.dataset == 'vidvrd':
        if args.task == 'relation':
            # load train set for zero-shot evaluation
            dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                             ['train', args.split])
        else:
            dataset = VidVRD('../vidvrd-dataset', '../vidvrd-dataset/videos',
                             [args.split])
    elif args.dataset == 'vidor':
        if args.task == 'relation':
            # load train set for zero-shot evaluation
            dataset = VidOR('../vidor-dataset/annotation',
                            '../vidor-dataset/video', ['training', args.split],
                            low_memory=True)
        else:
            dataset = VidOR('../vidor-dataset/annotation',
                            '../vidor-dataset/video', [args.split],
                            low_memory=True)
    else:
        raise Exception('Unknown dataset {}'.format(args.dataset))
Exemplo n.º 10
0
    anno_rpath = 'baseline/vidvrd-dataset'
    video_rpath = 'baseline/vidvrd-dataset/videos'
    splits = ['train', 'test']
    st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json'

    top_tree = 20
    overlap = 0.2
    iou_thr = 0.2
    test_result_name = 'mht_test_relation_prediction_v4_{}_{}_{}.json'.format(top_tree, overlap, iou_thr)

    prediction = os.path.join('baseline/vidvrd-dataset/vidvrd-baseline-output/models', test_result_name)

    short = False

    dataset = VidVRD(anno_rpath=anno_rpath,
                     video_rpath=video_rpath,
                     splits=splits)

    if short:
        print('Loading prediction from {}'.format(st_prediction))
        with open(st_prediction, 'r') as fin:
            pred = json.load(fin)

        if not short:
            print('Number of videos in prediction: {}'.format(len(pred)))

        # modify the split ['train', 'test']
        evaluate_relation(dataset, 'test', pred, segment=short)

    if not short:
        print('Loading prediction from {}'.format(prediction))
Exemplo n.º 11
0
#
# print(len(gt_segs_keys), len(gt_segs2_keys))
#
# for id, segs in gt_segs2.items():
#     print(id)
#     print(len(segs))
#     print(gt_segs[id])
#     break

anno_rpath = 'baseline/vidvrd-dataset'
video_rpath = 'baseline/vidvrd-dataset/videos'
splits = ['train', 'test']
prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/models/baseline_relation_prediction.json'
st_prediction = 'baseline/vidvrd-dataset/vidvrd-baseline-output/short-term-predication.json'

dataset = VidVRD(anno_rpath=anno_rpath, video_rpath=video_rpath, splits=splits)

video_indices = dataset.get_index(split='test')

with open(st_prediction, 'r') as st_pre_f:
    pred_segs = json.load(st_pre_f)

short_term_gt = dict()
short_term_pred = dict()

for vid in video_indices:
    gt = dataset.get_relation_insts(vid)
    pred = pred_segs[vid]
    gt_segs = separate_vid_2_seg(gt)

    for each_gt_seg in gt_segs: