test_model.py

# --------------------------------------------------------
# Written by Saurabh Gupta
# Modified by Ishan Misra
# --------------------------------------------------------

import caffe
import numpy as np
import cv2
import sg_utils as utils
import cap_eval_utils
from IPython.core.debugger import Tracer
import code

def load_model(prototxt_file, model_file, base_image_size, mean, vocab): 
  """
  Load the model from file. Includes pointers to the prototxt file, 
  caffemodel file name, and other settings - image mean, base_image_size, vocab 
  """
  model = {};
  model['net']= caffe.Net(prototxt_file, model_file, caffe.TEST);
  model['base_image_size'] = base_image_size;
  model['means'] = mean; model['vocab'] = vocab;
  return model

def output_words_image(threshold_metric, output_metric, min_words, threshold, vocab, is_functional):
  ind_output = np.argsort(threshold_metric)
  ind_output = ind_output[::-1]
  must_keep1 = threshold_metric[ind_output] >= threshold;
  must_keep2 = np.cumsum(is_functional[ind_output]) < 1+min_words;
  ind_output = [ind for j, ind in enumerate(ind_output) if must_keep1[j] or must_keep2[j]]
  out = [(vocab['words'][ind], output_metric[ind], threshold_metric[ind]) for ind in ind_output]
  return out

def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
  """
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
  """
  dt = utils.load_variables(detection_file);
  pt = utils.load_variables(eval_file);
  
  is_functional = np.array([x not in functional_words for x in vocab['words']]);
  prec = np.zeros(dt['mil_prob'].shape)
  for jj in xrange(prec.shape[1]):
    prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
      pt['details']['score'][:,jj].copy(), \
      pt['details']['precision'][:,jj].copy(), \
      dt['mil_prob'][:,jj]);
    utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1]))
  dt['prec'] = prec;

  out_all = [] 
  for i in xrange(imdb.num_images):
    out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
      min_words, threshold, vocab, is_functional)
    out_all.append(out)
    utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))
     
  if output_file is not None:
    with open(output_file, 'wt') as f:
      f.write('detection file %s\n'%(detection_file));
      f.write('eval file %s\n'%(eval_file));
      f.write('threshold %.2f; min_words %d\n'%(threshold, min_words));
      for i in xrange(imdb.num_images):
        f.write('{:d}: '.format(imdb.image_index[i]))
        out = out_all[i]
        for j in xrange(len(out)):
          f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
        f.write('\n')

def test_model(imdb, model, detection_file = None):
  """
  Tests model and stores detections on disk
  """
  N_WORDS = len(model['vocab']['words'])
  sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
  mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)

  for i in xrange(len(imdb.image_index)):
    im = cv2.imread(imdb.image_path_at(i))
    if im is None:
      print 'could not read %s; skipping'%(imdb.image_path_at(i))
      continue;

    sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means'])
    utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))

  if detection_file is not None:
    # utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
    #   ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
    utils.save_variables(detection_file, [sc, mil_prob], \
      ['sc', 'mil_prob'], overwrite = True)


def benchmark_one_word(i, P, R, score, ap, human_rec, human_prec, \
                      prec_at_human_rec, rec_at_human_prec, rec_at_half_prec,\
                      gt_label, mil_prob, num_references):
  P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
  human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    
  
  ind = np.where(R[:,i] >= human_rec[0,i])[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    prec_at_human_rec[0,i] = P[ind[0], i];

  ind = np.where(P[:,i] >= human_prec[0,i])[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    rec_at_human_prec[0,i] = R[ind[-1], i];
  
  ind = np.where(P[:,i] >= 0.5)[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    rec_at_half_prec[0,i]  = R[ind[-1], i];


def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file = None, noref = False):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    if noref:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
    else:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score };

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return P, R, score, ap;

def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    

    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];

    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;


def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;


def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    try:
      utils.save_variables(eval_file, [details, agg],
        ['details', 'agg'], overwrite = True)
    except:
      print 'Error trying to save to pickle, will try hdf5 hack';
      namespace = globals().copy()
      namespace.update(locals())
      code.interact(local=namespace)

      eval_file_details = eval_file.replace('.pkl','_details.h5');
      eval_file_details_keys = details.keys();
      eval_file_details_vals = [details[x] for x in eval_file_details_keys];
      utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True);

      eval_file_agg = eval_file.replace('.pkl','_agg.h5');
      eval_file_agg_keys = agg.keys();
      eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys];
      utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True);

  
  return details


def prepare_batch(imList, base_image_size, means):
  if not isinstance(imList, list):
    imList = [imList];

  imBatch = np.zeros((len(imList), 3, base_image_size, base_image_size), dtype=np.float32);
  for ind, imName in enumerate(imList):
    try:
      im = cv2.imread(imName);
    except:
      print 'could not read ',imName;      
      continue
    if im is None:
      print 'could not read (None) ',imName;      
      continue

    im_orig = im.astype(np.float32, copy=True)
    im_orig -= means
    
    im, gr, grr = upsample_image(im_orig, base_image_size)
    im = np.transpose(im, axes = (2, 0, 1))
    im = im[np.newaxis, :, :, :]
    imBatch[ind,:,:,:] = im[0,:,:,:];
  return imBatch;

def test_batch(batchImList, model, fields=['mil', 'mil_max']):

  imBatch = prepare_batch(batchImList, model['base_image_size'], model['means']);
  net = model['net'];
  net.forward(data=imBatch.astype(np.float32, copy=False))

  # Get outputs and return them
  # reshape appropriately
  fOut = [];  
  for ind, f in enumerate(fields):
    fOut.append(net.blobs[f].data.copy());
    fOut[ind] = fOut[ind].reshape(1, fOut[ind].size);
  if len(fields) == 2:
    return fOut[0], fOut[1];
  else:
    return fOut;

def test_img(im, net, base_image_size, means):
  """
  Calls Caffe to get output for this image
  """
  
  # Pass into Caffe
  net.forward(data=im.astype(np.float32, copy=False))

  # Get outputs and return them
  mil_prob= net.blobs['mil'].data.copy()
  sc = net.blobs['mil_max'].data.copy()

  # reshape appropriately
  mil_prob = mil_prob.reshape((1, mil_prob.size))
  sc = sc.reshape((1, sc.size))
  return sc, mil_prob

def get_fields_from_forward(im, net, base_image_size, means, fields):
  """
  Calls Caffe to get outputs for this image specified in fields
  returns a dict of fields
  """
  # Resize image
  im_orig = im.astype(np.float32, copy=True)
  im_orig -= means
  
  im, gr, grr = upsample_image(im_orig, base_image_size)
  im = np.transpose(im, axes = (2, 0, 1))
  im = im[np.newaxis, :, :, :]
  
  # Pass into Caffe
  net.forward(data=im.astype(np.float32, copy=False))  

  # Get outputs and return them
  outData = {};
  for fieldName in fields:
    outData[fieldName] = net.blobs[fieldName].data.copy();
  
  return outData


def upsample_image(im, sz):
  h = im.shape[0]
  w = im.shape[1]
  s = np.float(max(h, w))
  I_out = np.zeros((sz, sz, 3), dtype = np.float);
  I = cv2.resize(im, None, None, fx = np.float(sz)/s, fy = np.float(sz)/s, interpolation=cv2.INTER_LINEAR); 
  SZ = I.shape;
  I_out[0:I.shape[0], 0:I.shape[1],:] = I;
  return I_out, I, SZ

def upsample_image_int(im, sz):
  h = im.shape[0]
  w = im.shape[1]
  s = np.float(max(h, w))
  I_out = np.zeros((sz, sz, 3), dtype = np.int);
  I = cv2.resize(im, None, None, fx = np.float(sz)/s, fy = np.float(sz)/s, interpolation=cv2.INTER_LINEAR); 
  SZ = I.shape;
  I_out[0:I.shape[0], 0:I.shape[1],:] = I;
  return I_out, I, SZ