for imcrop_name, _, description in flat_query_dict[imname]: mask = load_gt_mask(mask_dir + imcrop_name + '.mat').astype(np.float32) labels = (mask > 0) processed_labels = im_processing.resize_and_pad(mask, input_H, input_W) > 0 text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, T) scores_val = sess.run(scores, feed_dict={ text_seq_batch : text_seq_val, imcrop_batch : imcrop_val }) scores_val = np.squeeze(scores_val) # Evaluate the segmentation performance of using bounding box segmentation pred_raw = (scores_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, labels) cum_I += I cum_U += U this_IoU = I/U for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/U >= eval_seg_iou) seg_total += 1 # Print results print('Final results on the whole test set') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U)
def test(reader, snapshot_file, visual_feat_dir): model = Model(mode='test', vocab_size=vocab_size, H=FLAGS.H, W=FLAGS.W, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps) score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I = cum_U = cum_I_dcrf = cum_U_dcrf = 0 seg_total = 0 seg_correct = [0 for _ in range(len(eval_seg_iou_list))] if FLAGS.dcrf: seg_correct_dcrf = [0 for _ in range(len(eval_seg_iou_list))] config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_loader = tf.train.Saver() snapshot_loader.restore(sess, snapshot_file % (FLAGS.max_iter)) for n_iter in range(reader.num_batch): sys.stdout.write('Testing %d/%d\r' % (n_iter + 1, reader.num_batch)) sys.stdout.flush() batch = reader.read_batch(is_log=False) text = batch['text_batch'] im_name = str(batch['im_name_batch']) mask = batch['mask_batch'].astype(np.float32) sent_id = batch['sent_id'] visual_feat = np.load(visual_feat_dir + im_name + '.npz')['arr_0'] score_val, pred_val, sigm_val = sess.run( [model.score, model.pred, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.visual_feat: visual_feat }) pred_val = np.squeeze(pred_val) pred_raw = (pred_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) cum_I += I cum_U += U for n_eval_iou in range(len(eval_seg_iou_list)): seg_correct[n_eval_iou] += (I / U >= eval_seg_iou_list[n_eval_iou]) if FLAGS.dcrf: sigm_val = np.squeeze(sigm_val) d = densecrf.DenseCRF2D(FLAGS.W, FLAGS.H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape( (FLAGS.H, FLAGS.W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop( pred_raw_dcrf, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) cum_I_dcrf += I cum_U_dcrf += U for n_eval_iou in range(len(eval_seg_iou_list)): seg_correct_dcrf[n_eval_iou] += (I / U >= eval_seg_iou_list[n_eval_iou]) seg_total += 1 sio.savemat('./results/%d.mat' % sent_id, { 'mask': predicts.astype(np.bool), 'iou': I / U }, do_compression=True) msg = 'cumulative IoU = %f' % (cum_I / cum_U) if FLAGS.dcrf: msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) print(msg)
def test(modelname, iter, dataset, weights, setname, dcrf, mu, tfmodel_folder): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname tfmodel_folder = './' + dataset + '/tfmodel/CMSA' pretrained_model = os.path.join( tfmodel_folder, dataset + '_' + modelname + '_release' + '.tfmodel') score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 mean_IoU, mean_dcrf_IoU = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8803 if dataset == 'referit' else 12112 IU_result = list() model = CMSA_model(H=H, W=W, mode='eval', vocab_size=vocab_size, weights=weights) # Load pretrained model snapshot_restorer = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, pretrained_model) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) NN = reader.num_batch print('test in', dataset, setname) for n_iter in range(reader.num_batch): if n_iter % (NN // 50) == 0: if n_iter / (NN // 50) % 5 == 0: sys.stdout.write(str(n_iter / (NN // 50) // 5)) else: sys.stdout.write('.') sys.stdout.flush() batch = reader.read_batch(is_log=False) text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:, :, ::-1] proc_im_ -= mu scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0) }) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = densecrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape( (H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop( pred_raw_dcrf, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) IU_result.append({'batch_no': n_iter, 'I': I, 'U': U}) mean_IoU += float(I) / U cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I / cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I / U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) mean_dcrf_IoU += float(I_dcrf) / U_dcrf cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou) # print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % ( cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total) print(result_str)
def test(iter, dataset, visualize, setname, dcrf, mu, tfmodel_folder, pre_emb=False, use_tree=False, neg_num=0.1): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname if visualize: save_dir = './' + dataset + '/visualization/' + str(iter) + '/' if not os.path.isdir(save_dir): os.makedirs(save_dir) weights = os.path.join(tfmodel_folder, dataset + '_iter_' + str(iter) + '.tfmodel') score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 mean_IoU, mean_dcrf_IoU = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8226 if dataset == 'referit' else 21692 emb_name = 'referit' if dataset == 'referit' else 'Gref' IU_result = list() if pre_emb: # use pretrained embbeding print("Use pretrained Embeddings.") model = LSCM_model(num_steps=30, H=H, W=W, mode='eval', vocab_size=vocab_size, emb_name=emb_name) else: model = LSCM_model(num_steps=30, H=H, W=W, mode='eval', vocab_size=vocab_size) # Load pretrained model snapshot_restorer = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, weights) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) NN = reader.num_batch for n_iter in range(reader.num_batch): if n_iter % (NN // 50) == 0: if n_iter / (NN // 50) % 5 == 0: sys.stdout.write(str(n_iter / (NN // 50) // 5)) else: sys.stdout.write('.') sys.stdout.flush() batch = reader.read_batch(is_log=False) text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) valid_idx = np.zeros([1], dtype=np.int32) graph = batch['graph_batch'] height = batch['height_batch'] for idx in range(text.shape[0]): if text[idx] != 0: valid_idx[0] = idx break if neg_num != 0.1: graph[graph < 0.5] = neg_num proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:, :, ::-1] proc_im_ -= mu if use_tree: scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0), model.valid_idx: np.expand_dims(valid_idx, axis=0), model.graph_adj: np.expand_dims(graph, axis=0), model.tree_height: np.expand_dims(height, axis=0) }) else: scores_val, up_val, sigm_val = sess.run( [model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0), model.valid_idx: np.expand_dims(valid_idx, axis=0) }) # scores_val = np.squeeze(scores_val) # pred_raw = (scores_val >= score_thresh).astype(np.float32) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = densecrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape( (H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop( pred_raw_dcrf, mask.shape[0], mask.shape[1]) if visualize: sent = batch['sent_batch'][0] visualize_seg(im, mask, predicts, sent) if dcrf: visualize_seg(im, mask, predicts_dcrf, sent) I, U = eval_tools.compute_mask_IU(predicts, mask) IU_result.append({'batch_no': n_iter, 'I': I, 'U': U}) mean_IoU += float(I) / U cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I / cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I / U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) mean_dcrf_IoU += float(I_dcrf) / U_dcrf cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou) # print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou] / seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou] / seg_total) result_str += 'overall IoU = %f; mean IoU = %f\n' % ( cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total) print(result_str)
def test(modelname, iter, dataset, visualize, weights, setname, dcrf, mu): data_folder = './' + dataset + '/' + setname + '_batch/' data_prefix = dataset + '_' + setname if visualize: save_dir = './' + dataset + '/visualization/' + modelname + '_' + str(iter) + '/' if not os.path.isdir(save_dir): os.makedirs(save_dir) pretrained_model = './' + dataset + '/tfmodel_BRI/' + dataset + '_' + weights + '_' + modelname + '_iter_' + str(iter) + '.tfmodel' score_thresh = 1e-9 eval_seg_iou_list = [.5, .6, .7, .8, .9] cum_I, cum_U = 0, 0 seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) if dcrf: cum_I_dcrf, cum_U_dcrf = 0, 0 seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0. H, W = 320, 320 vocab_size = 8803 if dataset == 'referit' else 12112 if modelname == 'BRI': model = BRI_model(H=H, W=W, mode='eval', vocab_size=vocab_size, weights=weights) else: raise ValueError('Unknown model name %s' % (modelname)) # Load pretrained model snapshot_restorer = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) snapshot_restorer.restore(sess, pretrained_model) reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) for n_iter in range(reader.num_batch): batch = reader.read_batch() text = batch['text_batch'] im = batch['im_batch'] mask = batch['mask_batch'].astype(np.float32) proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) proc_im_ = proc_im.astype(np.float32) proc_im_ = proc_im_[:,:,::-1] proc_im_ -= mu scores_val, up_val, sigm_val = sess.run([model.pred, model.up, model.sigm], feed_dict={ model.words: np.expand_dims(text, axis=0), model.im: np.expand_dims(proc_im_, axis=0) }) up_val = np.squeeze(up_val) pred_raw = (up_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) if dcrf: # Dense CRF post-processing sigm_val = np.squeeze(sigm_val) d = Dcrf.DenseCRF2D(W, H, 2) U = np.expand_dims(-np.log(sigm_val), axis=0) U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) unary = np.concatenate((U_, U), axis=0) unary = unary.reshape((2, -1)) d.setUnaryEnergy(unary) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) Q = d.inference(5) pred_raw_dcrf = np.argmax(Q, axis=0).reshape((H, W)).astype(np.float32) predicts_dcrf = im_processing.resize_and_crop(pred_raw_dcrf, mask.shape[0], mask.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, mask) cum_I += I cum_U += U msg = 'cumulative IoU = %f' % (cum_I/cum_U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/U >= eval_seg_iou) if dcrf: I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) cum_I_dcrf += I_dcrf cum_U_dcrf += U_dcrf msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf/cum_U_dcrf) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct_dcrf[n_eval_iou] += (I_dcrf/U_dcrf >= eval_seg_iou) print(msg) seg_total += 1 # Print results print('Segmentation evaluation (without DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U) print(result_str) if dcrf: print('Segmentation evaluation (with DenseCRF):') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I_dcrf/cum_U_dcrf) print(result_str)
for imcrop_name, _, description in flat_query_dict[imname]: mask = load_gt_mask(mask_dir + imcrop_name[:-4] + '.mat').astype(np.float32) labels = (mask > 0) processed_labels = im_processing.resize_and_pad(mask, input_H, input_W) > 0 text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, T) scores_val = sess.run(scores, feed_dict={ text_seq_batch : text_seq_val, imcrop_batch : imcrop_val }) scores_val = np.squeeze(scores_val) # Evaluate the segmentation performance of using bounding box segmentation pred_raw = (scores_val >= score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, labels) cum_I += I cum_U += U this_IoU = I/U for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/U >= eval_seg_iou) seg_total += 1 # Print results print('Final results on the whole test set') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U)
def inference(): with open('./seg_model/test.prototxt', 'w') as f: f.write(str(seg_model.generate_model('val', test_config.N))) caffe.set_device(test_config.gpu_id) caffe.set_mode_gpu() # Load pretrained model net = caffe.Net('./seg_model/test.prototxt', test_config.pretrained_model, caffe.TEST) ################################################################################ # Load annotations and bounding box proposals ################################################################################ query_dict = json.load(open(test_config.query_file)) bbox_dict = json.load(open(test_config.bbox_file)) imcrop_dict = json.load(open(test_config.imcrop_file)) imsize_dict = json.load(open(test_config.imsize_file)) imlist = list({name.split('_', 1)[0] + '.jpg' for name in query_dict}) vocab_dict = text_processing.load_vocab_dict_from_file(test_config.vocab_file) ################################################################################ # Flatten the annotations ################################################################################ flat_query_dict = {imname: [] for imname in imlist} for imname in imlist: this_imcrop_names = imcrop_dict[imname] for imcrop_name in this_imcrop_names: gt_bbox = bbox_dict[imcrop_name] if imcrop_name not in query_dict: continue this_descriptions = query_dict[imcrop_name] for description in this_descriptions: flat_query_dict[imname].append((imcrop_name, gt_bbox, description)) ################################################################################ # Testing ################################################################################ cum_I, cum_U = 0.0, 0.0 eval_seg_iou_list = [0.5, 0.6, 0.7, 0.8, 0.9] seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0.0 # Pre-allocate arrays imcrop_val = np.zeros((test_config.N, test_config.input_H, test_config.input_W, 3), dtype=np.float32) text_seq_val = np.zeros((test_config.T, test_config.N), dtype=np.int32) num_im = len(imlist) for n_im in tqdm(range(num_im)): imname = imlist[n_im] # Extract visual features from all proposals im = skimage.io.imread(test_config.image_dir + imname) processed_im = skimage.img_as_ubyte( im_processing.resize_and_pad(im, test_config.input_H, test_config.input_W)) if processed_im.ndim == 2: processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3)) imcrop_val[...] = processed_im.astype(np.float32) - seg_model.channel_mean imcrop_val_trans = imcrop_val.transpose((0, 3, 1, 2)) # Extract spatial features spatial_val = processing_tools.generate_spatial_batch(test_config.N, test_config.featmap_H, test_config.featmap_W) spatial_val = spatial_val.transpose((0, 3, 1, 2)) for imcrop_name, _, description in flat_query_dict[imname]: mask = load_gt_mask(test_config.mask_dir + imcrop_name + '.mat').astype(np.float32) labels = (mask > 0) processed_labels = im_processing.resize_and_pad(mask, test_config.input_H, test_config.input_W) processed_labels = processed_labels > 0 text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, test_config.T) cont_val = text_processing.create_cont(text_seq_val) net.blobs['language'].data[...] = text_seq_val net.blobs['cont'].data[...] = cont_val net.blobs['image'].data[...] = imcrop_val_trans net.blobs['spatial'].data[...] = spatial_val net.blobs['label'].data[...] = processed_labels net.forward() upscores = net.blobs['upscores'].data[...].copy() upscores = np.squeeze(upscores) # Evaluate the segmentation performance of using bounding box segmentation pred_raw = (upscores >= test_config.score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, labels) cum_I += I cum_U += U this_IoU = I/float(U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/float(U) >= eval_seg_iou) seg_total += 1 # Print results print('Final results on the whole test set') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U) print(result_str)
def inference(config): with open('./seg_model/test.prototxt', 'w') as f: f.write(str(seg_model.generate_model('val', config))) caffe.set_device(config.gpu_id) caffe.set_mode_gpu() # Load pretrained model net = caffe.Net('./seg_model/test.prototxt', config.pretrained_model, caffe.TEST) ################################################################################ # Load annotations and bounding box proposals ################################################################################ query_dict = json.load(open(config.query_file)) bbox_dict = json.load(open(config.bbox_file)) imcrop_dict = json.load(open(config.imcrop_file)) imsize_dict = json.load(open(config.imsize_file)) imlist = list({name.split('_', 1)[0] + '.jpg' for name in query_dict}) vocab_dict = text_processing.load_vocab_dict_from_file(config.vocab_file) ################################################################################ # Flatten the annotations ################################################################################ flat_query_dict = {imname: [] for imname in imlist} for imname in imlist: this_imcrop_names = imcrop_dict[imname] for imcrop_name in this_imcrop_names: gt_bbox = bbox_dict[imcrop_name] if imcrop_name not in query_dict: continue this_descriptions = query_dict[imcrop_name] for description in this_descriptions: flat_query_dict[imname].append((imcrop_name, gt_bbox, description)) ################################################################################ # Testing ################################################################################ cum_I, cum_U = 0.0, 0.0 eval_seg_iou_list = [0.5, 0.6, 0.7, 0.8, 0.9] seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0.0 # Pre-allocate arrays imcrop_val = np.zeros((config.N, config.input_H, config.input_W, 3), dtype=np.float32) text_seq_val = np.zeros((config.T, config.N), dtype=np.int32) num_im = len(imlist) for n_im in tqdm(range(num_im)): imname = imlist[n_im] # Extract visual features from all proposals im = skimage.io.imread(config.image_dir + imname) processed_im = skimage.img_as_ubyte( im_processing.resize_and_pad(im, config.input_H, config.input_W)) if processed_im.ndim == 2: processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3)) imcrop_val[...] = processed_im.astype(np.float32) - seg_model.channel_mean imcrop_val_trans = imcrop_val.transpose((0, 3, 1, 2)) # Extract spatial features spatial_val = processing_tools.generate_spatial_batch(config.N, config.featmap_H, config.featmap_W) spatial_val = spatial_val.transpose((0, 3, 1, 2)) for imcrop_name, _, description in flat_query_dict[imname]: mask = load_gt_mask(config.mask_dir + imcrop_name + '.mat').astype(np.float32) labels = (mask > 0) processed_labels = im_processing.resize_and_pad(mask, config.input_H, config.input_W) processed_labels = processed_labels > 0 text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, config.T) cont_val = text_processing.create_cont(text_seq_val) net.blobs['language'].data[...] = text_seq_val net.blobs['cont'].data[...] = cont_val net.blobs['image'].data[...] = imcrop_val_trans net.blobs['spatial'].data[...] = spatial_val net.blobs['label'].data[...] = processed_labels net.forward() upscores = net.blobs['upscores'].data[...].copy() upscores = np.squeeze(upscores) # Evaluate the segmentation performance of using bounding box segmentation pred_raw = (upscores >= config.score_thresh).astype(np.float32) predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1]) I, U = eval_tools.compute_mask_IU(predicts, labels) cum_I += I cum_U += U this_IoU = I/float(U) for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I/float(U) >= eval_seg_iou) seg_total += 1 # Print results print('Final results on the whole test set') result_str = '' for n_eval_iou in range(len(eval_seg_iou_list)): result_str += 'precision@%s = %f\n' % \ (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total) result_str += 'overall IoU = %f\n' % (cum_I/cum_U) print(result_str)