#query = 'bike on the red house'

    print("query =", query)
    print("Find best candidate..!")
    for i in range(8):
        im_file = './splited_image/test' + str(i) + '.jpg'
        edgebox_file = './proposal_box/selective_box' + str(
            i) + '.txt'  # pre-extracted EdgeBox proposals
        im = skimage.io.imread(im_file)
        imsize = np.array([im.shape[1], im.shape[0]])  # [width, height]
        candidate_boxes = np.loadtxt(edgebox_file).astype(int)
        candidate_boxes = np.reshape(candidate_boxes, (-1, 4))
        # Compute features
        region_feature = retriever.compute_descriptors_edgebox(
            captioner, im, candidate_boxes)
        spatial_feature = retriever.compute_spatial_feat(
            candidate_boxes, imsize)
        descriptors = np.concatenate((region_feature, spatial_feature), axis=1)
        context_feature = captioner.compute_descriptors([im],
                                                        output_name='fc7')

        # Compute scores of each candidate region
        scores = retriever.score_descriptors_context(descriptors, query,
                                                     context_feature,
                                                     captioner, vocab_dict)
        #candidate_boxes = (i, candidate_boxes)
        candidate_boxes = np.insert(candidate_boxes, 0, i, axis=1)
        if (i == 0):
            sum_candidate_box = candidate_boxes
        else:
            #sum_candidate_box=np.concatenate(sum_candidate_box,candidate_boxes,axis=1)
            sum_candidate_box = np.vstack((sum_candidate_box, candidate_boxes))
示例#2
0
sample_im = num_im

for n_im in range(sample_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]
    # gt
    imcrop_names = imcrop_dict[imname]
    candidate_boxes = candidate_boxes_dict[imname]

    im = skimage.io.imread(image_dir + imname + '.jpg')
    imsize = np.array([im.shape[1], im.shape[0]])  # [width, height]

    # Compute local descriptors (local image feature + spatial feature)
    descriptors = retriever.compute_descriptors_edgebox(
        captioner, im, candidate_boxes, 'fc7')  # (100,4096)
    spatial_feats = retriever.compute_spatial_feat(candidate_boxes,
                                                   imsize)  # (100,8)
    np.savez('./data/ReferIt/referit_proposal_feature/' + imname,
             spatial_feat=spatial_feats,
             local_feature=descriptors)

    # print intermediate results during testing
    if (n_im + 1) % 1000 == 0:
        print('Recall on first %d test images' % (n_im + 1))
        for k in [0, 10 - 1]:
            print('\trecall @ %d = %f' %
                  (k + 1, topK_correct_num[k] / total_num))

print('Final recall on the whole test set')
for k in [0, 10 - 1]:
    print('\trecall @ %d = %f' % (k + 1, topK_correct_num[k] / total_num))
################################################################################
示例#3
0
imset = set(util.io.load_str_list(trn_imlist_file))
vocab_dict = retriever.build_vocab_dict_from_file(vocab_file)
query_dict = util.io.load_json(query_file)
imsize_dict = util.io.load_json(imsize_dict_file)
imcrop_bbox_dict = util.io.load_json(imcrop_bbox_dict_file)

train_pairs = []
for imcrop_name, des in query_dict.iteritems():

    imname = imcrop_name.split('_', 1)[0]
    if imname not in imset:
        continue
    imsize = np.array(imsize_dict[imname])
    bbox = np.array(imcrop_bbox_dict[imcrop_name])
    # spatial info
    bbox_feat = retriever.compute_spatial_feat(bbox, imsize)
    context_feature = np.load(cached_context_features_dir + imname + '_fc7.npy')
    local_feaure = np.load(cache_local_features_dir + imcrop_name + '.png_fc7.npy')
    train_pairs += [(imcrop_name, d, bbox_feat, imname, context_feature, local_feature) for d in des]

# random shuffle training pairs
np.random.seed(3)
perm_idx = np.random.permutation(np.arange(len(train_pairs)))
train_pairs = [train_pairs[n] for n in perm_idx]

num_train_pairs = len(train_pairs)
num_train_pairs = num_train_pairs - num_train_pairs % N_batch
train_pairs = train_pairs[:num_train_pairs]
num_batch = int(num_train_pairs // N_batch)

imcrop_list = []
K = 100  # evaluate recall at 1, 2, ..., K
topK_correct_num = np.zeros(K, dtype=np.float32)
total_num = 0
for n_im in range(num_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]
    imcrop_names = imcrop_dict[imname]
    candidate_boxes = candidate_boxes_dict[imname]

    im = skimage.io.imread(image_dir + imname + '.jpg')
    imsize = np.array([im.shape[1], im.shape[0]])  # [width, height]

    # Compute local descriptors (local image feature + spatial feature)
    descriptors = retriever.compute_descriptors_edgebox(captioner, im,
                                                        candidate_boxes)
    spatial_feats = retriever.compute_spatial_feat(candidate_boxes, imsize)
    descriptors = np.concatenate((descriptors, spatial_feats), axis=1)

    num_imcrop = len(imcrop_names)
    num_proposal = candidate_boxes.shape[0]
    for n_imcrop in range(num_imcrop):
        imcrop_name = imcrop_names[n_imcrop]
        if imcrop_name not in query_dict:
            continue
        gt_bbox = np.array(imcrop_bbox_dict[imcrop_name])
        IoUs = retriever.compute_iou(candidate_boxes, gt_bbox)
        for n_sentence in range(len(query_dict[imcrop_name])):
            sentence = query_dict[imcrop_name][n_sentence]
            # Scores for each candidate region
            if use_context:
                scores = retriever.score_descriptors_context(descriptors, sentence,
imset = set(util.io.load_str_list(trn_imlist_file))
vocab_dict = retriever.build_vocab_dict_from_file(vocab_file)
query_dict = util.io.load_json(query_file)
imsize_dict = util.io.load_json(imsize_dict_file)
imcrop_bbox_dict = util.io.load_json(imcrop_bbox_dict_file)

train_pairs = []
for imcrop_name, des in query_dict.iteritems():

    imname = imcrop_name.split('_', 1)[0]
    if imname not in imset:
        continue
    imsize = np.array(imsize_dict[imname])
    bbox = np.array(imcrop_bbox_dict[imcrop_name])
    # spatial info
    bbox_feat = retriever.compute_spatial_feat(bbox, imsize)
    if os.path.isfile(cached_context_features_dir + imname + '_fc7.npy'):
        context_feature = np.load(cached_context_features_dir + imname + '_fc7.npy')
        if os.path.isfile(cached_local_features_dir + imcrop_name + '.png_fc7.npy'):
            local_feature = np.load(cached_local_features_dir + imcrop_name + '.png_fc7.npy')
            train_pairs += [(imcrop_name, d, bbox_feat, imname, context_feature, local_feature) for d in des]

# random shuffle training pairs
np.random.seed(3)
perm_idx = np.random.permutation(np.arange(len(train_pairs)))
train_pairs = [train_pairs[n] for n in perm_idx]

num_train_pairs = len(train_pairs)
num_train_pairs = num_train_pairs - num_train_pairs % N_batch
train_pairs = train_pairs[:num_train_pairs]
num_batch = int(num_train_pairs // N_batch)