示例#1
0
文件: visualize.py 项目: paulgay/VGfM
def viz_net2(net_name, weight_name, imdb, dump_file, viz_mode='viz_cls'):
    sess = tf.Session()
    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    ims = tf.placeholder(dtype=tf.float32,
                         shape=[None, None, None, 3],
                         name='ims')
    relations = tf.placeholder(dtype=tf.int32,
                               shape=[None, 2],
                               name='relations')
    inputs = {
        'rois': rois,
        'rel_rois': rel_rois,
        'ims': ims,
        'relations': relations,
        'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_classes': imdb.num_classes,
        'num_predicates': imdb.num_predicates,
        'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
        'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'quadric_rois': tf.placeholder(dtype=tf.float32, shape=[None, 28]),
        'rels_feat2d': tf.placeholder(dtype=tf.float32, shape=[None, 400]),
        'rels_feat3d': tf.placeholder(dtype=tf.float32, shape=[None, 400]),
        'n_iter': cfg.TEST.INFERENCE_ITER,
        'labels': tf.placeholder(dtype=tf.int32, shape=[None])
    }

    net = get_network(net_name)(inputs)
    net.setup()
    print('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)
    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    if net.iterable:
        inference_iter = net.n_iter - 1
    else:
        inference_iter = 0
    print('=======================VIZ INFERENCE Iteration = '),
    print(inference_iter)
    print('=======================VIZ MODES = '),
    print(viz_mode)
    #gts = np.zeros((0,5))
    sg_entries = []
    for im_i in range(0, min(3000, num_images)):  #num_images):
        print('processing image ' + str(im_i) + '/' + str(num_images))
        im = imdb.im_getter(im_i)
        #from scipy.misc import imread, imsave
        first_box_idx = imdb.im_to_last_box[im_i]
        bbox_reg = True
        if viz_mode == 'viz_cls':
            # use ground truth bounding boxes
            bbox_reg = False
            box_proposals = gt_rois(roidb[im_i])
        elif viz_mode == 'viz_det':
            # use RPN-proposed object locations
            box_proposals, roi_scores = non_gt_rois(roidb[im_i])
            roi_scores = np.expand_dims(roi_scores, axis=1)
            nms_keep = cpu_nms(
                np.hstack((box_proposals, roi_scores)).astype(np.float32),
                cfg.TEST.PROPOSAL_NMS)
            nms_keep = np.array(nms_keep)
            num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
            keep = nms_keep[:num_proposal]
            box_proposals = box_proposals[keep, :]
        else:
            raise NotImplementedError(
                'Incorrect visualization mode. Choose between [cls] and [det]')
        if box_proposals.size == 0 or box_proposals.shape[0] < 2:
            print 'skipping image', im_i
            continue
        quadric_rois = np.hstack(
            [
                np.zeros((box_proposals.shape[0], 1)),
                roidb[im_i]['quadric_rois']
            ]
        )  # this is because in the training phase, the image number is pre-pended, in the test case one image so index 0
        #rels_feat2d = roidb[im_i]['rel_geo_2d']
        #rels_feat3d = roidb[im_i]['rel_geo_3d']
        relations = roidb[im_i]['gt_relations'][:, :2]
        out_dict = im_detect(sess, net, inputs, im, box_proposals, bbox_reg,
                             [inference_iter], quadric_rois, relations,
                             roidb[im_i]['gt_classes'][:quadric_rois.shape[0]])
        sg_entry = out_dict[inference_iter]
        # ground predicted graphs to ground truth annotations
        gt_to_pred = ground_predictions(sg_entry, roidb[im_i], 0.5)
        roidb2 = dict(roidb[im_i])
        roidb2['gt_to_pred_object'] = gt_to_pred
        del roidb2['image']
        #import pdb; pdb.set_trace()
        sg_entries.append((sg_entry, roidb2))
    print 'saving results in file ', dump_file, 'number of entries: ', len(
        sg_entries)
    dump_f = open(dump_file, 'wb')
    pickle.dump(sg_entries, dump_f)
    dump_f.close()
示例#2
0
def test_net(net_name, weight_name, imdb, mode, max_per_image=100):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rel_rois')
    ims = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3], name='ims')
    relations = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='relations')
    inputs = {'rois': rois,
              'rel_rois': rel_rois,
              'ims': ims,
              'relations': relations,
              'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_classes': imdb.num_classes,
              'num_predicates': imdb.num_predicates,
              'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
              'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'n_iter': cfg.TEST.INFERENCE_ITER}


    net = get_network(net_name)(inputs)
    net.setup()
    print ('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)

    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    # timers
    _t = {'im_detect' : Timer(), 'evaluate' : Timer()}

    if mode == 'all':
        eval_modes = ['pred_cls', 'sg_cls', 'sg_det']
    else:
        eval_modes = [mode]
    multi_iter = [net.n_iter - 1] if net.iterable else [0]
    print('Graph Inference Iteration ='),
    print(multi_iter)
    print('EVAL MODES ='),
    print(eval_modes)

    # initialize evaluator for each task
    evaluators = {}
    for m in eval_modes:
        evaluators[m] = {}
        for it in multi_iter:
            evaluators[m][it] = SceneGraphEvaluator(imdb, mode=m)

    for im_i in xrange(num_images):

        im = imdb.im_getter(im_i)

        for mode in eval_modes:
            bbox_reg = True
            if mode == 'pred_cls' or mode == 'sg_cls':
                # use ground truth object locations
                bbox_reg = False
                box_proposals = gt_rois(roidb[im_i])
            else:
                # use RPN-proposed object locations
                box_proposals, roi_scores = non_gt_rois(roidb[im_i])
                roi_scores = np.expand_dims(roi_scores, axis=1)
                nms_keep = cpu_nms(np.hstack((box_proposals, roi_scores)).astype(np.float32),
                            cfg.TEST.PROPOSAL_NMS)
                nms_keep = np.array(nms_keep)
                num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
                keep = nms_keep[:num_proposal]
                box_proposals = box_proposals[keep, :]


            if box_proposals.size == 0 or box_proposals.shape[0] < 2:
                # continue if no graph
                continue

            _t['im_detect'].tic()
            out_dict = im_detect(sess, net, inputs, im, box_proposals,
                                 bbox_reg, multi_iter)
            _t['im_detect'].toc()
            _t['evaluate'].tic()
            for iter_n in multi_iter:
                sg_entry = out_dict[iter_n]
                evaluators[mode][iter_n].evaluate_scene_graph_entry(sg_entry, im_i, iou_thresh=0.5)
            _t['evaluate'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(im_i + 1, num_images, _t['im_detect'].average_time,
                      _t['evaluate'].average_time)

    # print out evaluation results 打印结果
    for mode in eval_modes:
        for iter_n in multi_iter:
            evaluators[mode][iter_n].print_stats()
示例#3
0
def eval_net(score_file, imdb, mode, write_rel_f, max_per_image=100):
    dic = json.load(open("/ssd_disk/gay/scenegraph/scannet-SGG-dicts.json"))
    preds_ok = np.zeros((len(dic['predicate_to_idx']) + 1, 1))
    preds_nok = np.zeros((len(dic['predicate_to_idx']) + 1, 1))
    sg_entries = pickle.load(open(score_file))
    # set up testing mode
    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    # timers
    _t = {'im_detect': Timer(), 'evaluate': Timer()}

    if mode == 'all':
        eval_modes = ['pred_cls', 'sg_cls', 'sg_det']
    else:
        eval_modes = [mode]
    multi_iter = [1]  #FIXME,. maybe try 2
    print('Graph Inference Iteration ='),
    print(multi_iter)
    print('EVAL MODES ='),
    print(eval_modes)

    # initialize evaluator for each task
    evaluators = {}
    for m in eval_modes:
        evaluators[m] = {}
        for it in multi_iter:
            evaluators[m][it] = SceneGraphEvaluator(imdb, mode=m)
    sg_i = -1
    preds = np.zeros((0, 1))
    f = open(write_rel_f, 'w')
    """
    #pickle.dump(roidb,open('gt.pc','wb'))
    my_roidb = []
    for i in range(0,num_images):
      gt_en = {}
      gt_en['gt_classes'] = roidb[i]['gt_classes']
      gt_en['boxes'] = roidb[i]['boxes']
      gt_en['gt_relations'] = roidb[i]['gt_relations']
      my_roidb.append(gt_en)
    pickle.dump(my_roidb,open('gt.pc','wb'))
    import sys; sys.exit()
    """
    for im_i in xrange(0, num_images, 1):  #xrange(num_images):
        im_i_roidb_full = imdb._image_index[im_i]
        seq_name = imdb.im2seq[im_i_roidb_full]
        im_path = imdb.impaths[imdb.roidb_idx_to_imdbidx[im_i_roidb_full]]
        fname = im_path.split('/')[-1].replace('.color.jpg', '')
        first_box_idx = imdb.im_to_first_box[im_i]
        im = imdb.im_getter(im_i)
        sg_i += 1
        sg_entry = sg_entries[sg_i]
        scores_rel = sg_entry['relations']
        gt_rel = roidb[im_i]['gt_relations']
        gt_rela = np.hstack([np.ones((gt_rel.shape[0], 1)) * im_i, gt_rel])
        pred_rel_vec = np.zeros((gt_rel.shape[0], 1))
        for i in range(gt_rel.shape[0]):
            o1 = gt_rela[i, 1]
            o2 = gt_rela[i, 2]
            o1_oid = imdb.roidb_to_scannet_oid[first_box_idx +
                                               o1][0]  # to check later
            o2_oid = imdb.roidb_to_scannet_oid[first_box_idx + o2][0]
            predicate = gt_rela[i, 3]
            if scores_rel[int(o1), int(o2),
                          int(predicate)] > scores_rel[int(o2),
                                                       int(o1),
                                                       int(predicate)]:
                pred_rel_vec[i] = 1
                preds_ok[int(predicate)] += 1
                f.write(' '.join(
                    (seq_name, fname, str(o1_oid), str(o2_oid),
                     imdb.info['idx_to_predicate'][str(int(predicate))],
                     '1\n')))
            else:
                preds_nok[int(predicate)] += 1
                pred_rel_vec[i] = 0
                f.write(' '.join(
                    (seq_name, fname, str(o1_oid), str(o2_oid),
                     imdb.info['idx_to_predicate'][str(int(predicate))],
                     '0\n')))
        preds = np.vstack([preds, pred_rel_vec])
        for mode in eval_modes:
            bbox_reg = True
            if mode == 'pred_cls' or mode == 'sg_cls':
                # use ground truth object locations
                bbox_reg = False
                box_proposals = gt_rois(roidb[im_i])
            else:
                # use RPN-proposed object locations
                box_proposals, roi_scores = non_gt_rois(roidb[im_i])
                roi_scores = np.expand_dims(roi_scores, axis=1)
                nms_keep = cpu_nms(
                    np.hstack((box_proposals, roi_scores)).astype(np.float32),
                    cfg.TEST.PROPOSAL_NMS)
                nms_keep = np.array(nms_keep)
                num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
                keep = nms_keep[:num_proposal]
                box_proposals = box_proposals[keep, :]
            if box_proposals.size == 0 or box_proposals.shape[0] < 2:
                # continue if no graph
                continue
            _t['im_detect'].tic()
            quadric_rois = np.vstack(
                [roidb[im_i]['quadric_rois'], roidb[im_i]['quadric_rois']]
            )  # this duplication, I don't know why, but the boxes are duplicated, so I did the same, maybe it is to evaluate different aspect
            quadric_rois = np.hstack(
                [np.zeros((quadric_rois.shape[0], 1)), quadric_rois]
            )  #this is because in the training phase, the image number is appended
            #out_dict = im_detect(sess, net, inputs, im, box_proposals, bbox_reg, multi_iter, quadric_rois)

            _t['im_detect'].toc()
            _t['evaluate'].tic()
            for iter_n in multi_iter:
                evaluators[mode][iter_n].evaluate_scene_graph_entry(
                    sg_entry, im_i, iou_thresh=0.5)
            _t['evaluate'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(im_i + 1, num_images, _t['im_detect'].average_time,
                      _t['evaluate'].average_time)

    # print out evaluation results
    for mode in eval_modes:
        for iter_n in multi_iter:
            evaluators[mode][iter_n].print_stats()
    print 'direct accuracy of the predicates', np.mean(preds)
    print preds_ok
    print preds_nok
    print preds_ok / (preds_ok + preds_nok)
    f.close()
示例#4
0
def viz_net(net_name, weight_name, imdb, viz_mode='viz_cls'):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    ims = tf.placeholder(dtype=tf.float32,
                         shape=[None, None, None, 3],
                         name='ims')
    relations = tf.placeholder(dtype=tf.int32,
                               shape=[None, 2],
                               name='relations')

    inputs = {
        'rois': rois,
        'rel_rois': rel_rois,
        'ims': ims,
        'relations': relations,
        'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_classes': imdb.num_classes,
        'num_predicates': imdb.num_predicates,
        'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
        'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'n_iter': cfg.TEST.INFERENCE_ITER
    }

    net = get_network(net_name)(inputs)
    net.setup()
    print('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)

    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    if net.iterable:
        inference_iter = net.n_iter - 1
    else:
        inference_iter = 0
    print('=======================VIZ INFERENCE Iteration = '),
    print(inference_iter)
    print('=======================VIZ MODES = '),
    print(viz_mode)

    for im_i in xrange(num_images):
        im = imdb.im_getter(im_i)

        bbox_reg = True
        if viz_mode == 'viz_cls':
            # use ground truth bounding boxes
            bbox_reg = False
            box_proposals = gt_rois(roidb[im_i])
        elif viz_mode == 'viz_det':
            # use RPN-proposed object locations
            box_proposals, roi_scores = non_gt_rois(roidb[im_i])
            roi_scores = np.expand_dims(roi_scores, axis=1)
            nms_keep = cpu_nms(
                np.hstack((box_proposals, roi_scores)).astype(np.float32),
                cfg.TEST.PROPOSAL_NMS)
            nms_keep = np.array(nms_keep)
            num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
            keep = nms_keep[:num_proposal]
            box_proposals = box_proposals[keep, :]
        else:
            raise NotImplementedError(
                'Incorrect visualization mode. Choose between [cls] and [det]')

        if box_proposals.size == 0 or box_proposals.shape[0] < 2:
            continue

        out_dict = im_detect(sess, net, inputs, im, box_proposals, bbox_reg,
                             [inference_iter])
        sg_entry = out_dict[inference_iter]

        # ground predicted graphs to ground truth annotations
        gt_to_pred = ground_predictions(sg_entry, roidb[im_i], 0.5)
        draw_graph_pred(im, sg_entry['boxes'], sg_entry['scores'],
                        sg_entry['relations'], gt_to_pred, roidb[im_i])
def viz_net(net_name, weight_name, imdb, viz_mode='viz_cls'):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    ims = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3], name='ims')
    relations = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='relations')

    Xmat = sio.loadmat('tensor_prior.mat');
    X_r = Xmat['X_r']
    # matlab's X_r is in (pred, sub, obj). swap to make it (sub, obj, pred)
    X_r = np.swapaxes(X_r, 0, 2)
    X_r = np.swapaxes(X_r, 0, 1)

    inputs = {'rois': rois,
              'rel_rois': rel_rois,
              'ims': ims,
              'relations': relations,
              'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_classes': imdb.num_classes,
              'num_predicates': imdb.num_predicates,
              'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
              'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'n_iter': cfg.TEST.INFERENCE_ITER,
              'Xr': X_r,
              'sigma': 0.5,
              'mask': tf.placeholder(dtype=tf.float32, shape=[None, 51])
              }



    net = get_network(net_name)(inputs)
    net.setup()
    print ('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)

    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    if net.iterable:
        inference_iter = net.n_iter - 1
    else:
        inference_iter = 0
    # print('=======================VIZ INFERENCE Iteration %s =======================' %(net.n_iter))
    # print('=======================VIZ MODES = %s =======================' %(viz_mode)),

    # all using gt
    use_gt = False
    for im_i in xrange(num_images):
        im = imdb.im_getter(im_i)

        bbox_reg = True
        if viz_mode == 'viz_cls':
            # use ground truth bounding boxes
            bbox_reg = False
            box_proposals = gt_rois(roidb[im_i])
        elif viz_mode == 'viz_det':
            # use RPN-proposed object locations
            box_proposals, roi_scores = non_gt_rois(roidb[im_i])
            roi_scores = np.expand_dims(roi_scores, axis=1)
            nms_keep = cpu_nms(np.hstack((box_proposals, roi_scores)).astype(np.float32),
                        cfg.TEST.PROPOSAL_NMS)
            nms_keep = np.array(nms_keep)
            num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
            keep = nms_keep[:num_proposal]
            box_proposals = box_proposals[keep, :]
        elif viz_mode == 'viz_gt':
            bbox_reg = False
            box_proposals = gt_rois(roidb[im_i])
            use_gt = True
        else:
            raise NotImplementedError('Incorrect visualization mode. Choose among [cls], [det], [gt] ')

        if box_proposals.size == 0 or box_proposals.shape[0] < 2:
            continue

        out_dict = im_detect(sess, net, inputs, im, box_proposals,
                                bbox_reg, [inference_iter])
        sg_entry = out_dict[inference_iter]

        # ground predicted graphs to ground truth annotations
        gt_to_pred = ground_predictions(sg_entry, roidb[im_i], 0.5)

        spl = os.path.split(weight_name)
        fn, folder = spl[1].split('.')[0], os.path.split(spl[0])[1]
        save_dir = os.path.join('viz_output/',folder, fn)
        draw_graph_pred(im, sg_entry['boxes'], sg_entry['scores'], sg_entry['relations'],
                             gt_to_pred, roidb[im_i], im_i, save_dir, use_gt=use_gt)

    print 'saved images and scene graphs to %s' %(save_dir)
def extract_net(net_name, weight_name, imdb, mode, max_per_image=100):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32,
                              shape=[None, 5],
                              name='rel_rois')
    ims = tf.placeholder(dtype=tf.float32,
                         shape=[None, None, None, 3],
                         name='ims')
    relations = tf.placeholder(dtype=tf.int32,
                               shape=[None, 2],
                               name='relations')
    inputs = {
        'rois': rois,
        'rel_rois': rel_rois,
        'ims': ims,
        'relations': relations,
        'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_classes': 151,  # pre-trained nums
        'num_predicates': 51,  # pre-trained nums
        'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
        'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'n_iter': cfg.TEST.INFERENCE_ITER
    }

    net = get_network(net_name)(inputs)
    net.setup()
    print('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)

    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)
    num_rois = imdb.all_boxes.shape[0]
    # timers
    _t = {'im_detect': Timer(), 'evaluate': Timer()}

    if mode == 'all':
        eval_modes = ['pred_cls', 'sg_cls', 'sg_det']
    else:
        eval_modes = [mode]
    multi_iter = [net.n_iter - 1] if net.iterable else [0]
    print('Graph Inference Iteration ='),
    print(multi_iter)
    print('EVAL MODES ='),
    print(eval_modes)

    ##
    vert_features_file = h5py.File(
        os.path.join(cfg.VG_DIR, 'avenue_vert_feature.h5'), 'w')

    vert_feature = vert_features_file.create_dataset('vert_cls',
                                                     (num_rois, 512),
                                                     dtype=np.float32)

    edge_counter = 0
    # print('-------')
    # print(num_images)
    for im_i in xrange(num_images):
        im = imdb.im_getter(im_i)

        for mode in eval_modes:
            bbox_reg = True
            if mode == 'pred_cls' or mode == 'sg_cls' or mode == 'extract_fea':
                # use ground truth object locations
                bbox_reg = False
                box_proposals = gt_rois(roidb[im_i])
            else:
                # use RPN-proposed object locations
                box_proposals, roi_scores = non_gt_rois(roidb[im_i])
                roi_scores = np.expand_dims(roi_scores, axis=1)
                nms_keep = cpu_nms(
                    np.hstack((box_proposals, roi_scores)).astype(np.float32),
                    cfg.TEST.PROPOSAL_NMS)
                nms_keep = np.array(nms_keep)
                num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
                keep = nms_keep[:num_proposal]
                box_proposals = box_proposals[keep, :]

            if box_proposals.size == 0 or box_proposals.shape[0] < 2:
                # continue if no graph
                continue

            _t['im_detect'].tic()
            out_dict = im_detect(sess, net, inputs, im, box_proposals,
                                 bbox_reg, multi_iter)

            _t['im_detect'].toc()
            _t['evaluate'].tic()
            for iter_n in multi_iter:
                sg_entry = out_dict[iter_n]
                im_to_first_box, im_to_last_box = imdb.get_im_to_box_idx(im_i)
                print(im_i, im_to_first_box, im_to_last_box,
                      sg_entry['vert'].shape, sg_entry['scores'].shape)
                record_vert_features = sg_entry['vert']
                vert_feature[im_to_first_box:im_to_last_box +
                             1, :] = record_vert_features[0:im_to_last_box +
                                                          1 -
                                                          im_to_first_box, :]

            _t['evaluate'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(im_i + 1, num_images, _t['im_detect'].average_time,
                      _t['evaluate'].average_time)
示例#7
0
def viz_net(net_name, weight_name, imdb, viz_mode='viz_cls'):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    ims = tf.placeholder(dtype=tf.float32,
                         shape=[None, None, None, 3],
                         name='ims')
    relations = tf.placeholder(dtype=tf.int32,
                               shape=[None, 2],
                               name='relations')

    inputs = {
        'rois': rois,
        'rel_rois': rel_rois,
        'ims': ims,
        'relations': relations,
        'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
        'num_classes': imdb.num_classes,
        'num_predicates': imdb.num_predicates,
        'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
        'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
        'quadric_rois': tf.placeholder(dtype=tf.float32, shape=[None, 25]),
        'n_iter': cfg.TEST.INFERENCE_ITER
    }

    net = get_network(net_name)(inputs)
    net.setup()
    print('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)
    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    if net.iterable:
        inference_iter = net.n_iter - 1
    else:
        inference_iter = 0
    print('=======================VIZ INFERENCE Iteration = '),
    print(inference_iter)
    print('=======================VIZ MODES = '),
    print(viz_mode)
    rec = 0
    for im_i in range(num_images):
        print('processing image ' + str(im_i) + '/' + str(num_images))
        im = imdb.im_getter(im_i)

        bbox_reg = True
        if viz_mode == 'viz_cls':
            # use ground truth bounding boxes
            bbox_reg = False
            box_proposals = gt_rois(roidb[im_i])
        elif viz_mode == 'viz_det':
            # use RPN-proposed object locations
            box_proposals, roi_scores = non_gt_rois(roidb[im_i])
            roi_scores = np.expand_dims(roi_scores, axis=1)
            nms_keep = cpu_nms(
                np.hstack((box_proposals, roi_scores)).astype(np.float32),
                cfg.TEST.PROPOSAL_NMS)
            nms_keep = np.array(nms_keep)
            num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
            keep = nms_keep[:num_proposal]
            box_proposals = box_proposals[keep, :]
        else:
            raise NotImplementedError(
                'Incorrect visualization mode. Choose between [cls] and [det]')
        if box_proposals.size == 0 or box_proposals.shape[0] < 2:
            print 'skipping image', im_i
            continue
        #import pdb; pdb.set_trace()
        quadric_rois = np.vstack(
            [roidb[im_i]['quadric_rois'], roidb[im_i]['quadric_rois']]
        )  # this duplication, I don't know why, but the boxes are duplicated, so I did the same, maybe it is to evaluate different aspect
        quadric_rois = np.hstack(
            [np.zeros((quadric_rois.shape[0], 1)), quadric_rois]
        )  #this is because in the training phase, the image number is appended
        out_dict = im_detect(sess, net, inputs, im, box_proposals, bbox_reg,
                             [inference_iter], quadric_rois)
        sg_entry = out_dict[inference_iter]
        # ground predicted graphs to ground truth annotations
        gt_to_pred = ground_predictions(sg_entry, roidb[im_i], 0.5)
        gt_rel = roidb[im_i]['gt_relations']
        pred_rel = np.argmax(sg_entry['relations'], 2)
        if roidb[im_i]['gt_relations'][0][2] == pred_rel[
                roidb[im_i]['gt_relations'][0][0],
                roidb[im_i]['gt_relations'][0][1]]:
            rec += 0.5
        if roidb[im_i]['gt_relations'][1][2] == pred_rel[
                roidb[im_i]['gt_relations'][1][0],
                roidb[im_i]['gt_relations'][1][1]]:
            rec += 0.5
        #my_draw_graph_pred(im, sg_entry['boxes'], sg_entry['scores'], sg_entry['relations'], gt_to_pred, roidb[im_i], im_i)
    print 'recall', rec / num_images