def __init__(self, checkpoint_path):
     checkpoint = pickle.load(open(checkpoint_path, 'rb'))
     self.model = checkpoint['model']
     self.checkpoint_params = checkpoint['params']
     self.decoder = decodeGenerator(self.checkpoint_params)
     self.ixtoword = checkpoint['ixtoword']
     self.kwparams = { 'beam_size' : 5}
示例#2
0
def RNNGenCost(batch, model, params, misc):
    """ cost function, returns cost and gradients for model """
    regc = params["regc"]  # regularization cost
    BatchGenerator = decodeGenerator(params)
    wordtoix = misc["wordtoix"]

    # forward the RNN on each image sentence pair
    # the generator returns a list of matrices that have word probabilities
    # and a list of cache objects that will be needed for backprop
    Ys, gen_caches = BatchGenerator.forward(batch, model, params, misc, predict_mode=False)

    # compute softmax costs for all generated sentences, and the gradients on top
    loss_cost = 0.0
    dYs = []
    logppl = 0.0
    logppln = 0
    for i, pair in enumerate(batch):
        img = pair["image"]
        # ground truth indeces for this sentence we expect to see
        gtix = [wordtoix[w] for w in pair["sentence"]["tokens"] if w in wordtoix]
        gtix.append(0)  # don't forget END token must be predicted in the end!
        # fetch the predicted probabilities, as rows
        Y = Ys[i]
        maxes = np.amax(Y, axis=1, keepdims=True)
        e = np.exp(Y - maxes)  # for numerical stability shift into good numerical range
        P = e / np.sum(e, axis=1, keepdims=True)
        loss_cost += -np.sum(np.log(1e-20 + P[range(len(gtix)), gtix]))  # note: add smoothing to not get infs
        logppl += -np.sum(np.log2(1e-20 + P[range(len(gtix)), gtix]))  # also accumulate log2 perplexities
        logppln += len(gtix)

        # lets be clever and optimize for speed here to derive the gradient in place quickly
        for iy, y in enumerate(gtix):
            P[iy, y] -= 1  # softmax derivatives are pretty simple
        dYs.append(P)

    # backprop the RNN
    grads = BatchGenerator.backward(dYs, gen_caches)

    # add L2 regularization cost and gradients
    reg_cost = 0.0
    if regc > 0:
        for p in misc["regularize"]:
            mat = model[p]
            reg_cost += 0.5 * regc * np.sum(mat * mat)
            grads[p] += regc * mat

    # normalize the cost and gradient by the batch size
    batch_size = len(batch)
    reg_cost /= batch_size
    loss_cost /= batch_size
    for k in grads:
        grads[k] /= batch_size

    # return output in json
    out = {}
    out["cost"] = {"reg_cost": reg_cost, "loss_cost": loss_cost, "total_cost": loss_cost + reg_cost}
    out["grad"] = grads
    out["stats"] = {"ppl2": 2 ** (logppl / logppln)}
    return out
示例#3
0
def main(params):


  # output blob which we will dump to JSON for visualizing the results
  blob = {}
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = params['root_path']
  img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines()

  # load the features for all images
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] =img_names[n]

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : params['beam_size'] }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
    blob['imgblobs'].append(img_blob)

  # dump result struct to file
  save_file = os.path.join(root_path, 'result_struct.json')
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  html = ''
  for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])
  html_file = os.path.join(root_path, 'result.html')
  print 'writing html result file to %s...' % (html_file, )
  open(html_file, 'w').write(html)
示例#4
0
def RNNGenCost(batch, model, params, misc):
  """ cost function, returns cost and gradients for model """
  regc = params['regc'] # regularization cost
  BatchGenerator = decodeGenerator(params)
  wordtoix = misc['wordtoix']

  # forward the RNN on each image sentence pair
  # the generator returns a list of matrices that have word probabilities
  # and a list of cache objects that will be needed for backprop
  Ys, gen_caches = BatchGenerator.forward(batch, model, params, misc, predict_mode = False)

  # compute softmax costs for all generated sentences, and the gradients on top
  loss_cost = 0.0
  dYs = []
  logppl = 0.0
  logppln = 0
  for i,pair in enumerate(batch):
    img = pair['image']
    # ground truth indeces for this sentence we expect to see
    gtix = [ wordtoix[w] for w in pair['sentence'].split() if w in wordtoix ]
    gtix.append(0) # don't forget END token must be predicted in the end!
    # fetch the predicted probabilities, as rows
    Y = Ys[i]
    maxes = np.amax(Y, axis=1, keepdims=True)
    e = np.exp(Y - maxes) # for numerical stability shift into good numerical range
    P = e / np.sum(e, axis=1, keepdims=True)
    loss_cost += - np.sum(np.log(1e-20 + P[range(len(gtix)),gtix])) # note: add smoothing to not get infs
    logppl += - np.sum(np.log2(1e-20 + P[range(len(gtix)),gtix])) # also accumulate log2 perplexities
    logppln += len(gtix)

    # lets be clever and optimize for speed here to derive the gradient in place quickly
    for iy,y in enumerate(gtix):
      P[iy,y] -= 1 # softmax derivatives are pretty simple
    dYs.append(P)

  # backprop the RNN
  grads = BatchGenerator.backward(dYs, gen_caches)

  # add L2 regularization cost and gradients
  reg_cost = 0.0
  if regc > 0:    
    for p in misc['regularize']:
      mat = model[p]
      reg_cost += 0.5 * regc * np.sum(mat * mat)
      grads[p] += regc * mat

  # normalize the cost and gradient by the batch size
  batch_size = len(batch)
  reg_cost /= batch_size
  loss_cost /= batch_size
  for k in grads: grads[k] /= batch_size

  # return output in json
  out = {}
  out['cost'] = {'reg_cost' : reg_cost, 'loss_cost' : loss_cost, 'total_cost' : loss_cost + reg_cost}
  out['grad'] = grads
  out['stats'] = { 'ppl2' : 2 ** (logppl / logppln)}
  return out
示例#5
0
 def __init__(self, cpfile, taskfile):
     super(RNNComponent, self).__init__()
     checkpoint = pickle.load(open(cpfile, "rb"))
     self.params = checkpoint["params"]
     self.dataset = self.params["dataset"]
     self.model = checkpoint["model"]
     self.misc = {}
     self.misc["wordtoix"] = checkpoint["wordtoix"]
     self.ixtoword = checkpoint["ixtoword"]
     self.BatchGenerator = decodeGenerator(self.params)
     self.img_names = open(taskfile, "r").read().splitlines()
示例#6
0
 def __init__(self, cpfile, taskfile):
     super(RNNComponent, self).__init__()
     checkpoint = pickle.load(open(cpfile, "rb"))
     self.params = checkpoint["params"]
     self.dataset = self.params["dataset"]
     self.model = checkpoint["model"]
     self.misc = {}
     self.misc["wordtoix"] = checkpoint["wordtoix"]
     self.ixtoword = checkpoint["ixtoword"]
     self.BatchGenerator = decodeGenerator(self.params)
     self.img_names = open(taskfile, "r").read().splitlines()
 def predict(self, features):
     BatchGenerator = decodeGenerator(CHECKPOINT_PATH)
     img = {}
     img['feat'] = features[:, 0]
     kwparams = {'beam_size': self.BEAM_SIZE}
     Ys = BatchGenerator.predict([{
         'image': img
     }], self.language_model, self.checkpoint_params, **kwparams)
     top_predictions = Ys[
         0]  # take predictions for the first (and only) image we passed in
     top_prediction = top_predictions[
         0]  # these are sorted with highest on top
     candidate = ' '.join([
         self.ixtoword[ix] for ix in top_prediction[1] if ix > 0
     ])  # ix 0 is the END token, skip that
     return candidate
示例#8
0
def get_sentences(file_name, feats_path):



      # load the tasks.txt file
      # load the features for all images
      features_struct = scipy.io.loadmat(feats_path)
      features = features_struct['feats'] # this is a 4096 x N numpy array of features
      D,N = features.shape # N= 1

      # iterate over all images and predict sentences
      BatchGenerator = decodeGenerator(checkpoint_params)
      img = {}
      img['feat'] = features[:, 0]  #Cause N=1
      img['local_file_path'] =file_name

      # perform the work. heavy lifting happens inside
      kwparams = { 'beam_size' : 30 } # beam size set here
      Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
      # encode the top prediction
      top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
      top_prediction = top_predictions[0] # these are sorted with highest on top
      candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
      return str(candidate)
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  dump_folder = params['dump_folder']

  if dump_folder:
    print 'creating dump folder ' + dump_folder
    os.system('mkdir -p ' + dump_folder)
    
  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  n = 0
  all_references = []
  all_candidates = []
  captions_res = []
  for img in dp.iterImages(split = 'test', max_images = max_images):
    n+=1
    print 'image %d/%d:' % (n, max_images)
    references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    kwparams = { 'beam_size' : params['beam_size'] }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    img_blob = {} # we will build this up
    img_blob['img_path'] = img['local_file_path']
    img_blob['imgid'] = img['imgid']
    img_blob['id'] = img['id']

    if dump_folder:
      # copy source file to some folder. This makes it easier to distribute results
      # into a webpage, because all images that were predicted on are in a single folder
      source_file = img['local_file_path']
      target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path']))
      os.system('cp %s %s' % (source_file, target_file))

    # encode the human-provided references
    img_blob['references'] = []
    flag = True
    for gtsent in references:
      if flag:
        print 'GT: ' + gtsent
        flag = False
      img_blob['references'].append({'text': gtsent})

    # now evaluate and encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    # save for later eval
    all_references.append(references)
    all_candidates.append(candidate)
    captions_res.append({'image_id':img_blob['id'],'caption':candidate})
    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

  # use perl script to eval BLEU score for fair comparison to other research work
  # first write intermediate files
  print 'writing intermediate files into eval/'
  open('eval/output', 'w').write('\n'.join(all_candidates))
  for q in xrange(5):
    open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references]))
  # invoke the perl script to get BLEU scores
  print 'invoking eval/multi-bleu.perl script...'
  owd = os.getcwd()
  os.chdir('eval')
  os.system('./multi-bleu.perl reference < output')
  os.chdir(owd)

  # # now also evaluate test split perplexity
  # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
  # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
  # blob['gtppl'] = gtppl

  # dump result struct to file
  print 'saving result struct to %s' % (params['result_struct_filename'], )
  json.dump(blob, open(params['result_struct_filename'], 'w'))

  alg_name = params['checkpoint_path'].split('_')[1]
  res_file_name = params['out_dir']+'/captions_val_'+alg_name+'_results.json'
  json.dump(captions_res, open(res_file_name, 'w'))

  from eval_tools import metrics
  metrics.run(dataset,alg_name,params['out_dir'])
示例#10
0
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    max_images = params['max_images']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []
    for img in dp.iterImages(split='test', max_images=max_images):
        n += 1
        print 'image %d/%d:' % (n, max_images)
        references = [' '.join(x['tokens'])
                      for x in img['sentences']]  # as list of lists of tokens
        kwparams = {'beam_size': params['beam_size']}
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob['img_path'] = img['local_file_path']
        img_blob['imgid'] = img['imgid']

        # encode the human-provided references
        img_blob['references'] = []
        for gtsent in references:
            print 'GT: ' + gtsent
            img_blob['references'].append({'text': gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print 'PRED: (%f) %s' % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)

        img_blob['candidate'] = {
            'text': candidate,
            'logprob': top_prediction[0]
        }
        blob['imgblobs'].append(img_blob)

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print 'writing intermediate files into eval/'
    open('eval/output', 'w').write('\n'.join(all_candidates))
    for q in xrange(5):
        open('eval/reference' + ` q `,
             'w').write('\n'.join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print 'invoking eval/multi-bleu.perl script...'
    owd = os.getcwd()
    os.chdir('eval')
    os.system('./multi-bleu.perl reference < output')
    os.chdir(owd)

    # now also evaluate test split perplexity
    gtppl = eval_split('test',
                       dp,
                       model,
                       checkpoint_params,
                       misc,
                       eval_max_images=max_images)
    print 'perplexity of ground truth words based on dictionary of %d words: %f' % (
        len(ixtoword), gtppl)
    blob['gtppl'] = gtppl

    # dump result struct to file
    print 'saving result struct to %s' % (params['result_struct_filename'], )
    json.dump(blob, open(params['result_struct_filename'], 'w'))
示例#11
0
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = params['root_path']
    task_file = params['task_file']
    img_names = open(task_file, 'r').read().splitlines()

    # load the features for all images
    '''
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  print features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape
  '''
    features_path = params['feature_file']
    features = pickle.load(open(features_path))
    features = features.T
    #features = features_struct['feats'] # this is a 4096 x N numpy array of features
    D, N = features.shape

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        img = {}
        img['feat'] = features[:, n]
        img['local_file_path'] = img_names[n]

        # perform the work. heavy lifting happens inside
        kwparams = {'beam_size': params['beam_size']}
        tic = time.time()
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)
        toc = time.time()

        print 'image %d/%d: %f' % (n, N, toc - tic)
        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']
        img_blob['rnn_time'] = (toc - tic)
        img_blob['candidate'] = {'text': [], 'logprob': []}
        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        for i in xrange(min(5, len(top_predictions))):
            top_prediction = top_predictions[i]
            candidate = ' '.join([
                ixtoword[ix] for ix in top_prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate)
            img_blob['candidate']['text'] += [candidate]
            img_blob['candidate']['logprob'] += [top_prediction[0]]
        '''
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    '''
        #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
        blob['imgblobs'].append(img_blob)

    # dump result struct to file
    #save_file = os.path.join(root_path, 'result_struct.json')
    save_file = params['out_file']
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    '''
示例#12
0
def main(params):
    batch_size = params['batch_size']
    word_count_threshold = params['word_count_threshold']
    max_epochs = params['max_epochs']
    host = socket.gethostname()  # get computer hostname

    #--------------------------------- Init data provider and load data+features #---------------------------------#
    # fetch the data provider
    dp = getDataProvider(params)

    params['aux_inp_size'] = params['featenc_hidden_size'] * params[
        'n_encgt_sent'] if params['encode_gt_sentences'] else dp.aux_inp_size
    params['featenc_hidden_size'] = params['featenc_hidden_size'] if params[
        'encode_gt_sentences'] else params['aux_inp_size']

    params['image_feat_size'] = dp.img_feat_size
    print 'Image feature size is %d, and aux input size is %d' % (
        params['image_feat_size'], params['aux_inp_size'])

    #--------------------------------- Preprocess sentences and build Vocabulary #---------------------------------#
    misc = {
    }  # stores various misc items that need to be passed around the framework
    # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
    # at least word_count_threshold number of times
    if params['checkpoint_file_name'] == 'None':
        if params['class_out_factoring'] == 0:
            misc['wordtoix'], misc[
                'ixtoword'], bias_init_vector = preProBuildWordVocab(
                    dp.iterSentences('train'), word_count_threshold)
        else:
            [misc['wordtoix'], misc['classes']
             ], [misc['ixtoword'], misc['clstotree'], misc['ixtoclsinfo']
                 ], [bias_init_vector, bias_init_inter_class
                     ] = preProBuildWordVocab(dp.iterSentences('train'),
                                              word_count_threshold, params)
            params['nClasses'] = bias_init_inter_class.shape[0]
            params['ixtoclsinfo'] = misc['ixtoclsinfo']
    else:
        misc = checkpoint_init['misc']
        params['nClasses'] = checkpoint_init['params']['nClasses']
        if 'ixtoclsinfo' in misc:
            params['ixtoclsinfo'] = misc['ixtoclsinfo']

    params['vocabulary_size'] = len(misc['wordtoix'])
    params['output_size'] = len(misc['ixtoword'])  # these should match though
    print len(misc['wordtoix']), len(misc['ixtoword'])

    #------------------------------ Initialize the solver/generator and build forward path #-----------------------#
    # Initialize the optimizer
    solver = Solver(params['solver'])
    # This initializes the model parameters and does matrix initializations
    lstmGenerator = decodeGenerator(params)
    model, misc['update'], misc['regularize'] = (lstmGenerator.model_th,
                                                 lstmGenerator.update_list,
                                                 lstmGenerator.regularize)

    # force overwrite here. The bias to the softmax is initialized to reflect word frequencies
    # This is a bit of a hack
    if params['checkpoint_file_name'] == 'None':
        model['bd'].set_value(bias_init_vector.astype(config.floatX))
        if params['class_out_factoring'] == 1:
            model['bdCls'].set_value(
                bias_init_inter_class.astype(config.floatX))

    #----------------- If we are using feature encoders -----------------------
    # This mode can now also be used for encoding GT sentences.
    if params['use_encoder_for'] & 1:
        if params['encode_gt_sentences']:
            xI = tensor.zeros((batch_size, params['image_encoding_size']))
            imgFeatEnc_inp = []
        else:
            imgFeatEncoder = RecurrentFeatEncoder(params['image_feat_size'],
                                                  params['word_encoding_size'],
                                                  params,
                                                  mdl_prefix='img_enc_',
                                                  features=dp.features.T)
            mdlLen = len(model.keys())
            model.update(imgFeatEncoder.model_th)
            assert (len(model.keys()) == (mdlLen +
                                          len(imgFeatEncoder.model_th.keys())))
            misc['update'].extend(imgFeatEncoder.update_list)
            misc['regularize'].extend(imgFeatEncoder.regularize)
            (imgenc_use_dropout, imgFeatEnc_inp, xI,
             updatesLSTMImgFeat) = imgFeatEncoder.build_model(model, params)
    else:
        xI = None
        imgFeatEnc_inp = []

    if params['use_encoder_for'] & 2:
        aux_enc_inp = model['Wemb'] if params[
            'encode_gt_sentences'] else dp.aux_inputs.T
        hid_size = params['featenc_hidden_size']
        auxFeatEncoder = RecurrentFeatEncoder(hid_size,
                                              params['image_encoding_size'],
                                              params,
                                              mdl_prefix='aux_enc_',
                                              features=aux_enc_inp)
        mdlLen = len(model.keys())
        model.update(auxFeatEncoder.model_th)
        assert (len(model.keys()) == (mdlLen +
                                      len(auxFeatEncoder.model_th.keys())))
        misc['update'].extend(auxFeatEncoder.update_list)
        misc['regularize'].extend(auxFeatEncoder.regularize)
        (auxenc_use_dropout, auxFeatEnc_inp, xAux,
         updatesLSTMAuxFeat) = auxFeatEncoder.build_model(model, params)

        if params['encode_gt_sentences']:
            # Reshape it size(batch_size, n_gt, hidden_size)
            xAux = xAux.reshape(
                (-1, params['n_encgt_sent'], params['featenc_hidden_size']))
            # Convert it to size (batch_size, n_gt*hidden_size
            xAux = xAux.flatten(2)

    else:
        auxFeatEnc_inp = []
        xAux = None

    #--------------------------------- Initialize the Attention Network #-------------------------------#
    if params['use_attn'] != None:
        attnModel = AttentionNetwork(params['image_feat_size'],
                                     params['hidden_size'],
                                     params,
                                     mdl_prefix='attn_mlp_')
        mdlLen = len(model.keys())
        model.update(attnModel.model_th)
        assert (len(model.keys()) == (mdlLen + len(attnModel.model_th.keys())))
        misc['update'].extend(attnModel.update_list)
        misc['regularize'].extend(attnModel.regularize)
        attn_nw_func = attnModel.build_model
    else:
        attn_nw_func = None

    #--------------------------------- Build the language model graph #---------------------------------#
    # Define the computational graph for relating the input image features and word indices to the
    # log probability cost funtion.
    (use_dropout, inp_list_gen, f_pred_prob, cost, predTh,
     updatesLSTM) = lstmGenerator.build_model(model,
                                              params,
                                              xI,
                                              xAux,
                                              attn_nw=attn_nw_func)

    inp_list = imgFeatEnc_inp + auxFeatEnc_inp + inp_list_gen
    #--------------------------------- Cost function and gradient computations setup #---------------------------------#
    costGrad = cost[0]
    # Add class uncertainity to final cost
    #if params['class_out_factoring'] == 1:
    #  costGrad += cost[2]
    # Add the regularization cost. Since this is specific to trainig and doesn't get included when we
    # evaluate the cost on test or validation data, we leave it here outside the model definition
    if params['regc'] > 0.:
        reg_cost = theano.shared(numpy_floatX(0.), name='reg_c')
        reg_c = tensor.as_tensor_variable(numpy_floatX(params['regc']),
                                          name='reg_c')
        reg_cost = 0.
        for p in misc['regularize']:
            reg_cost += (model[p]**2).sum()
            reg_cost *= 0.5 * reg_c
        costGrad += (reg_cost / params['batch_size'])

    # Compile an evaluation function.. Doesn't include gradients
    # To be used for validation set evaluation
    f_eval = theano.function(inp_list, cost, name='f_eval')

    # Now let's build a gradient computation graph and rmsprop update mechanism
    grads = tensor.grad(costGrad, wrt=model.values())
    lr = tensor.scalar(name='lr', dtype=config.floatX)
    f_grad_shared, f_update, zg, rg, ud = solver.build_solver_model(
        lr, model, grads, inp_list, cost, params)

    print 'model init done.'
    print 'model has keys: ' + ', '.join(model.keys())
    #print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update'])
    #print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize'])
    #print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

    #-------------------------------- Intialize the prediction path if needed by evaluator ----------------------------#
    evalKwargs = {
        'eval_metric': params['eval_metric'],
        'f_gen': lstmGenerator.predict,
        'beamsize': params['eval_beamsize']
    }
    if params['eval_metric'] != 'perplex':
        lstmGenerator.prepPredictor(None, params, params['eval_beamsize'])
        refToks, scr_info = eval_prep_refs('val', dp, params['eval_metric'])
        evalKwargs['refToks'] = refToks
        evalKwargs['scr_info'] = scr_info
        valMetOp = operator.gt
    else:
        valMetOp = operator.lt

    if params['met_to_track'] != []:
        trackMetargs = {
            'eval_metric': params['met_to_track'],
            'f_gen': lstmGenerator.predict,
            'beamsize': params['eval_beamsize']
        }
        lstmGenerator.prepPredictor(None, params, params['eval_beamsize'])
        refToks, scr_info = eval_prep_refs('val', dp, params['met_to_track'])
        trackMetargs['refToks'] = refToks
        trackMetargs['scr_info'] = scr_info

    #--------------------------------- Iterations and Logging intializations ------------------------------------------#
    # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images
    # Hence in case of coco/flickr this will 5* no of images
    num_sentences_total = dp.getSplitSize('train', ofwhat='sentences')
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    eval_period_in_epochs = params['eval_period']
    eval_period_in_iters = max(
        1, int(num_iters_one_epoch * eval_period_in_epochs))
    top_val_sc = -1
    smooth_train_ppl2 = len(
        misc['ixtoword'])  # initially size of dictionary of confusion
    val_sc = len(misc['ixtoword'])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    #json_worker_status['params'] = params
    json_worker_status['history'] = []
    len_hist = defaultdict(int)

    #Initialize Tracking the perplexity of train and val, with iters.
    train_perplex = []
    val_perplex = []
    trackSc_array = []

    #-------------------------------------- Load previously saved model ------------------------------------------------#
    #- Initialize the model parameters from the checkpoint file if we are resuming training
    if params['checkpoint_file_name'] != 'None':
        zipp(model_init_from, model)
        if params['restore_grads'] == 1:
            zipp(rg_init, rg)
        #Copy trackers from previous checkpoint
        if 'trackers' in checkpoint_init:
            train_perplex = checkpoint_init['trackers']['train_perplex']
            val_perplex = checkpoint_init['trackers']['val_perplex']
            trackSc_array = checkpoint_init['trackers'].get('trackScores', [])
        print(
            """\nContinuing training from previous model\n. Already run for %0.2f epochs with
            validation perplx at %0.3f\n""" %
            (checkpoint_init['epoch'], checkpoint_init['perplexity']))

    #--------------------------------------  MAIN LOOP ----------------------------------------------------------------#
    for it in xrange(max_iters):
        t0 = time.time()
        # Enable using dropout in training
        use_dropout.set_value(float(params['use_dropout']))
        if params['use_encoder_for'] & 1:
            imgenc_use_dropout.set_value(float(params['use_dropout']))
        if params['use_encoder_for'] & 2:
            auxenc_use_dropout.set_value(float(params['use_dropout']))

        epoch = it * 1.0 / num_iters_one_epoch
        #-------------------------------------- Prepare batch-------------------------------------------#
        # fetch a batch of data
        if params['sample_by_len'] == 0:
            batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
        else:
            batch, l = dp.getRandBatchByLen(batch_size)
            len_hist[l] += 1

        enc_inp_list = prepare_seq_features(
            batch,
            use_enc_for=params['use_encoder_for'],
            maxlen=params['maxlen'],
            use_shared_mem=params['use_shared_mem_enc'],
            enc_gt_sent=params['encode_gt_sentences'],
            n_enc_sent=params['n_encgt_sent'],
            wordtoix=misc['wordtoix'])

        if params['use_pos_tag'] != 'None':
            gen_inp_list, lenS = prepare_data(
                batch,
                misc['wordtoix'],
                params['maxlen'],
                sentTagMap,
                misc['ixtoword'],
                rev_sents=params['reverse_sentence'],
                use_enc_for=params['use_encoder_for'],
                use_unk_token=params['use_unk_token'])
        else:
            gen_inp_list, lenS = prepare_data(
                batch,
                misc['wordtoix'],
                params['maxlen'],
                rev_sents=params['reverse_sentence'],
                use_enc_for=params['use_encoder_for'],
                use_unk_token=params['use_unk_token'])

        if params['sched_sampling_mode'] != None:
            gen_inp_list.append(epoch)

        real_inp_list = enc_inp_list + gen_inp_list

        #import ipdb; ipdb.set_trace()
        #---------------------------------- Compute cost and apply gradients ---------------------------#
        # evaluate cost, gradient and perform parameter update
        cost = f_grad_shared(*real_inp_list)
        f_update(params['learning_rate'])
        dt = time.time() - t0

        # print training statistics
        train_ppl2 = (2**(cost[1] / lenS))  #step_struct['stats']['ppl2']
        # smooth exponentially decaying moving average
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2
        if it == 0:
            smooth_train_ppl2 = train_ppl2  # start out where we start out

        total_cost = cost[0]
        if it == 0: smooth_cost = total_cost  # start out where we start out
        smooth_cost = 0.99 * smooth_cost + 0.01 * total_cost

        #print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
        #      % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
        #         train_ppl2, smooth_train_ppl2)

        #---------------------------------- Write a report into a json file ---------------------------#
        tnow = time.time()
        if tnow > last_status_write_time + 60 * 1:  # every now and then lets write a report
            print '%d/%d batch done in %.3fs. at epoch %.2f. Cost now is %.3f and pplx is %.3f' \
                    % (it, max_iters, dt, epoch, smooth_cost, smooth_train_ppl2)
            last_status_write_time = tnow
            jstatus = {}
            jstatus['time'] = datetime.datetime.now().isoformat()
            jstatus['iter'] = (it, max_iters)
            jstatus['epoch'] = (epoch, max_epochs)
            jstatus['time_per_batch'] = dt
            jstatus['smooth_train_ppl2'] = smooth_train_ppl2
            jstatus['val_sc'] = val_sc  # just write the last available one
            jstatus['val_metric'] = params[
                'eval_metric']  # just write the last available one
            jstatus['train_ppl2'] = train_ppl2
            #if params['class_out_factoring'] == 1:
            #  jstatus['class_cost'] = float(cost[2])
            json_worker_status['history'].append(jstatus)
            status_file = os.path.join(
                params['worker_status_output_directory'],
                host + '_status.json')
            #import pdb; pdb.set_trace()
            try:
                json.dump(json_worker_status, open(status_file, 'w'))
            except Exception, e:  # todo be more clever here
                print 'tried to write worker status into %s but got error:' % (
                    status_file, )
                print e

        #--------------------------------- VALIDATION ---------------------------#
        #- perform perplexity evaluation on the validation set and save a model checkpoint if it's good
        is_last_iter = (it + 1) == max_iters
        if (((it + 1) % eval_period_in_iters) == 0
                and it < max_iters - 5) or is_last_iter:
            # Disable using dropout in validation
            use_dropout.set_value(0.)
            if params['use_encoder_for'] & 1:
                imgenc_use_dropout.set_value(0.)
            if params['use_encoder_for'] & 2:
                auxenc_use_dropout.set_value(0.)

            # perform the evaluation on VAL set
            val_sc = eval_split_theano('val', dp, model, params, misc, f_eval,
                                       **evalKwargs)
            val_sc = val_sc[0]
            val_perplex.append((it, val_sc))
            train_perplex.append((it, smooth_train_ppl2))

            if params['met_to_track'] != []:
                track_sc = eval_split_theano('val', dp, model, params, misc,
                                             f_eval, **trackMetargs)
                trackSc_array.append((it, {
                    evm: track_sc[i]
                    for i, evm in enumerate(params['met_to_track'])
                }))

            if epoch - params['lr_decay_st_epoch'] >= 0:
                params['learning_rate'] = params['learning_rate'] * params[
                    'lr_decay']
                params['lr_decay_st_epoch'] += 1

            print 'validation %s = %f, lr = %f' % (
                params['eval_metric'], val_sc, params['learning_rate'])
            #if params['sample_by_len'] == 1:
            #  print len_hist

            #----------------------------- SAVE THE MODEL -------------------#
            write_checkpoint_ppl_threshold = params[
                'write_checkpoint_ppl_threshold']
            if valMetOp(val_sc, top_val_sc) or top_val_sc < 0:
                if valMetOp(val_sc, write_checkpoint_ppl_threshold
                            ) or write_checkpoint_ppl_threshold < 0:
                    # if we beat a previous record or if this is the first time
                    # AND we also beat the user-defined threshold or it doesnt exist
                    top_val_sc = val_sc
                    filename = 'model_checkpoint_%s_%s_%s_%s%.2f.p' % (
                        params['dataset'], host, params['fappend'],
                        params['eval_metric'][:3], val_sc)
                    filepath = os.path.join(
                        params['checkpoint_output_directory'], filename)
                    model_npy = unzip(model)
                    rgrads_npy = unzip(rg)
                    checkpoint = {}
                    checkpoint['it'] = it
                    checkpoint['epoch'] = epoch
                    checkpoint['model'] = model_npy
                    checkpoint['rgrads'] = rgrads_npy
                    checkpoint['params'] = params
                    checkpoint['perplexity'] = val_sc
                    checkpoint['misc'] = misc
                    checkpoint['trackers'] = {
                        'train_perplex': train_perplex,
                        'val_perplex': val_perplex,
                        'trackScores': trackSc_array
                    }
                    try:
                        pickle.dump(checkpoint, open(filepath, "wb"))
                        print 'saved checkpoint in %s' % (filepath, )
                    except Exception, e:  # todo be more clever here
                        print 'tried to write checkpoint into %s but got error: ' % (
                            filepath, )
                        print e
示例#13
0
def main(params):
    batch_size = params['batch_size']
    dataset = params['dataset']
    word_count_threshold = params['word_count_threshold']
    do_grad_check = params['do_grad_check']
    max_epochs = params['max_epochs']
    host = socket.gethostname()  # get computer hostname

    params['mode'] = 'CPU'

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {
    }  # stores various misc items that need to be passed around the framework

    # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
    # at least word_count_threshold number of times
    misc['wordtoix'], misc[
        'ixtoword'], bias_init_vector = preProBuildWordVocab(
            dp.iterSentences('train'), word_count_threshold)
    # delegate the initialization of the model to the Generator class
    BatchGenerator = decodeGenerator(params)
    init_struct = BatchGenerator.init(params, misc)
    model, misc['update'], misc['regularize'] = (init_struct['model'],
                                                 init_struct['update'],
                                                 init_struct['regularize'])

    if params['mode'] == 'GPU':
        # force overwrite here. This is a bit of a hack, not happy about it
        model['bd'] = gp.garray(
            bias_init_vector.reshape(1, bias_init_vector.size))
    else:
        model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

    print 'model init done.'
    print 'model has keys: ' + ', '.join(model.keys())
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['update'])
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['regularize'])
    print 'number of learnable parameters total: %d' % (sum(
        model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

    # initialize the Solver and the cost function
    solver = Solver()

    def costfun(batch, model):
        # wrap the cost function to abstract some things away from the Solver
        return RNNGenCost(batch, model, params, misc)

    # calculate how many iterations we need
    num_sentences_total = dp.getSplitSize('train', ofwhat='sentences')
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    eval_period_in_epochs = params['eval_period']
    eval_period_in_iters = max(
        1, int(num_iters_one_epoch * eval_period_in_epochs))
    abort = False
    top_val_ppl2 = -1
    smooth_train_ppl2 = len(
        misc['ixtoword'])  # initially size of dictionary of confusion
    val_ppl2 = len(misc['ixtoword'])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    json_worker_status['params'] = params
    json_worker_status['history'] = []
    max_iters = 1
    for it in xrange(max_iters):
        if abort: break
        t0 = time.time()
        # fetch a batch of data
        batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
        # evaluate cost, gradient and perform parameter update
        step_struct = solver.step(batch, model, costfun, **params)
        cost = step_struct['cost']
        dt = time.time() - t0

        # print training statistics
        train_ppl2 = step_struct['stats']['ppl2']
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2  # smooth exponentially decaying moving average
        if it == 0:
            smooth_train_ppl2 = train_ppl2  # start out where we start out
        epoch = it * 1.0 / num_iters_one_epoch
        print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
              % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
                 train_ppl2, smooth_train_ppl2)

        # perform gradient check if desired, with a bit of a burnin time (10 iterations)
        #if it == 10 and do_grad_check:
        #  solver.gradCheck(batch, model, costfun)
        #  print 'done gradcheck. continue?'
        #  raw_input()
        #
        ## detect if loss is exploding and kill the job if so
        #total_cost = cost['total_cost']
        #if it == 0:
        #  total_cost0 = total_cost # store this initial cost
        #if total_cost > total_cost0 * 2:
        #  print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
        #  abort = True # set the abort flag, we'll break out
        #
        ## logging: write JSON files for visual inspection of the training
        #tnow = time.time()
        #if tnow > last_status_write_time + 60*1: # every now and then lets write a report
        #  last_status_write_time = tnow
        #  jstatus = {}
        #  jstatus['time'] = datetime.datetime.now().isoformat()
        #  jstatus['iter'] = (it, max_iters)
        #  jstatus['epoch'] = (epoch, max_epochs)
        #  jstatus['time_per_batch'] = dt
        #  jstatus['smooth_train_ppl2'] = smooth_train_ppl2
        #  jstatus['val_ppl2'] = val_ppl2 # just write the last available one
        #  jstatus['train_ppl2'] = train_ppl2
        #  json_worker_status['history'].append(jstatus)
        #  status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json')
        #  try:
        #    json.dump(json_worker_status, open(status_file, 'w'))
        #  except Exception, e: # todo be more clever here
        #    print 'tried to write worker status into %s but got error:' % (status_file, )
        #    print e
        #
        ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good
        #is_last_iter = (it+1) == max_iters
        #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
        #  val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set
        #  print 'validation perplexity = %f' % (val_ppl2, )
        #  write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
        #  if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
        #    if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
        #      # if we beat a previous record or if this is the first time
        #      # AND we also beat the user-defined threshold or it doesnt exist
        #      top_val_ppl2 = val_ppl2
        #      filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2)
        #      filepath = os.path.join(params['checkpoint_output_directory'], filename)
        #      checkpoint = {}
        #      checkpoint['it'] = it
        #      checkpoint['epoch'] = epoch
        #      checkpoint['model'] = model
        #      checkpoint['params'] = params
        #      checkpoint['perplexity'] = val_ppl2
        #      checkpoint['wordtoix'] = misc['wordtoix']
        #      checkpoint['ixtoword'] = misc['ixtoword']
        #      try:
        #        pickle.dump(checkpoint, open(filepath, "wb"))
        #        print 'saved checkpoint in %s' % (filepath, )
        #      except Exception, e: # todo be more clever here
        #        print 'tried to write checkpoint into %s but got error: ' % (filepat, )
        #        print e
        cuda.close()
def main(params):

    # load the checkpoint
    checkpoint_path = params["checkpoint_path"]
    max_images = params["max_images"]

    print "loading checkpoint %s" % (checkpoint_path,)
    checkpoint = pickle.load(open(checkpoint_path, "rb"))
    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model = checkpoint["model"]
    dump_folder = params["dump_folder"]

    if dump_folder:
        print "creating dump folder " + dump_folder
        os.system("mkdir -p " + dump_folder)

    ## ANAND - CHANGE TEST PATH

    # fetch the data provider
    # dp = getDataProvider(dataset)
    # pdb.set_trace()
    dp = getDataProvider("example_images")

    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    blob = {}  # output blob which we will dump to JSON for visualizing the results
    blob["params"] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []

    for img in dp.iterImages(split="test", max_images=max_images):
        n += 1
        print "image %d/%d:" % (n, max_images)

        # pdb.set_trace()

        references = [" ".join(x["tokens"]) for x in img["sentences"]]  # as list of lists of tokens
        kwparams = {"beam_size": params["beam_size"]}
        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob["img_path"] = img["local_file_path"]
        img_blob["imgid"] = img["imgid"]

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img["local_file_path"]
            target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"]))
            os.system("cp %s %s" % (source_file, target_file))

        # encode the human-provided references
        img_blob["references"] = []
        for gtsent in references:
            print "GT: " + gtsent
            img_blob["references"].append({"text": gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[0]  # these are sorted with highest on top
        candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0])  # ix 0 is the END token, skip that
        print "PRED: (%f) %s" % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)

        img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]}
        blob["imgblobs"].append(img_blob)

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print "writing intermediate files into eval/"
    open("eval/output", "w").write("\n".join(all_candidates))
    for q in xrange(5):
        open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print "invoking eval/multi-bleu.perl script..."
    owd = os.getcwd()
    os.chdir("eval")
    os.system("./multi-bleu.perl reference < output")
    os.chdir(owd)

    # now also evaluate test split perplexity
    gtppl = eval_split("test", dp, model, checkpoint_params, misc, eval_max_images=max_images)
    print "perplexity of ground truth words based on dictionary of %d words: %f" % (len(ixtoword), gtppl)
    blob["gtppl"] = gtppl

    # dump result struct to file
    print "saving result struct to %s" % (params["result_struct_filename"],)
    json.dump(blob, open(params["result_struct_filename"], "w"))
def main(params):

    # load the checkpoint
    checkpoint_path = params["checkpoint_path"]
    max_images = params["max_images"]

    print "loading checkpoint %s" % (checkpoint_path,)
    checkpoint = pickle.load(open(checkpoint_path, "rb"))
    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model_npy = checkpoint["model"]
    dump_folder = params["dump_folder"]

    if "use_theano" not in checkpoint_params:
        checkpoint_params["use_theano"] = 1
    checkpoint_params["use_theano"] = 1

    if "image_feat_size" not in checkpoint_params:
        checkpoint_params["image_feat_size"] = 4096

    if dump_folder:
        print "creating dump folder " + dump_folder
        os.system("mkdir -p " + dump_folder)

    # fetch the data provider
    dp = getDataProvider(checkpoint_params)

    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    blob = {}  # output blob which we will dump to JSON for visualizing the results
    blob["params"] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)

    if checkpoint_params["use_theano"] == 1:
        # Compile and init the theano predictor
        BatchGenerator.prepPredictor(model_npy, checkpoint_params, params["beam_size"])
        model = BatchGenerator.model_th
        print (
            "\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n"
            % (checkpoint["epoch"], checkpoint["perplexity"])
        )

    n = 0
    all_references = []
    all_candidates = []
    for img in dp.iterImages(split="test", max_images=max_images):
        n += 1
        print "image %d/%d:" % (n, max_images)
        references = [" ".join(x["tokens"]) for x in img["sentences"]]  # as list of lists of tokens
        kwparams = {"beam_size": params["beam_size"]}

        img["feat"] = np.random.rand(*img["feat"].shape)

        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob["img_path"] = img["local_file_path"]
        img_blob["imgid"] = img["imgid"]

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img["local_file_path"]
            target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"]))
            os.system("cp %s %s" % (source_file, target_file))

        # encode the human-provided references
        img_blob["references"] = []
        for gtsent in references:
            print "GT: " + gtsent
            img_blob["references"].append({"text": gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[0]  # these are sorted with highest on top
        # import pdb; pdb.set_trace()
        candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0])  # ix 0 is the END token, skip that
        print "PRED: (%f) %s" % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)

        img_blob["candidate"] = {"text": candidate, "logprob": float(top_prediction[0])}

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[ci + 1]  # these are sorted with highest on top
            candidate = " ".join(
                [ixtoword[int(ix)] for ix in prediction[1] if ix > 0]
            )  # ix 0 is the END token, skip that
            candlist.append({"text": candidate, "logprob": float(prediction[0])})

        img_blob["candidatelist"] = candlist

        blob["imgblobs"].append(img_blob)

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print "writing intermediate files into eval/"
    open("eval/output", "w").write("\n".join(all_candidates))
    for q in xrange(5):
        open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print "invoking eval/multi-bleu.perl script..."
    owd = os.getcwd()
    os.chdir("eval")
    os.system("./multi-bleu.perl reference < output")
    os.chdir(owd)

    # now also evaluate test split perplexity

    #  if checkpoint_params['use_theano'] == 0:
    #    gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
    #  else:
    #    gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set
    #  print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
    #  blob['gtppl'] = gtppl
    #
    # dump result struct to file
    print "saving result struct to %s" % (params["result_struct_filename"],)
    json.dump(blob, open(params["result_struct_filename"], "w"))
示例#16
0
  PJT_ROOT = '/works/neuraltalk/'
  MODEL_ROOT = '/storage/models/vgg/'
  PATH_MODEL_DEF_FILE = '%s/vgg_layer16_deploy_feature_relu7.prototxt' % MODEL_ROOT
  PATH_MODEL = '%s/vgg_layer16.caffemodel' % MODEL_ROOT
  WITH_GPU = 0
  path_imgs = []
  
  print "Feature Extraction for %d images starting now"%(len(path_imgs))
  net = caffe_load_model(PATH_MODEL_DEF_FILE, PATH_MODEL, WITH_GPU)

  import pdb; pdb.set_trace()
  params = {}
  params['beam_size'] = 10
  params['checkpoint_path'] = '%s/cv/coco/model_checkpoint_coco_SKP1002596MN001.local_baseline_11.14.p' % PJT_ROOT
  checkpoint = pickle.load(open(params['checkpoint_path'], 'rb'))
  BatchGenerator = decodeGenerator(checkpoint)
  checkpoint_params = checkpoint['params']
  model = checkpoint['model']
  ixtoword = checkpoint['ixtoword']

  import pdb; pdb.set_trace()
  while True:
    path_imgs = []
    path_imgs.append(raw_input("Input image: "))
    start_time = time.time()
    features = caffe_extract_feats(net, path_imgs)
    print "Encoding in %.2f sec."%(time.time()-start_time)
  
    img = {}
    img['feat'] = features[:,0]
    kwparams = { 'beam_size' : params['beam_size'] }
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']

    model_npy = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    if 'use_theano' not in checkpoint_params:
        checkpoint_params['use_theano'] = 1

    checkpoint_params['use_theano'] = 1

    if 'image_feat_size' not in checkpoint_params:
        checkpoint_params['image_feat_size'] = 4096

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = params['root_path']
    img_names_list = open(params['imgList'], 'r').read().splitlines()

    if len(img_names_list[0].rsplit(',')) > 2:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        sentRaw = [x.rsplit(',')[1] for x in img_names_list]
        idxes = [int(x.rsplit(',')[2]) for x in img_names_list]
    elif len(img_names_list[0].rsplit(',')) == 2:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        sentRaw = [x.rsplit(',')[1] for x in img_names_list]
        idxes = xrange(len(img_names_list))
    else:
        print 'ERROR: List should atleast contain image name and a corresponding sentence'
        return

    if checkpoint_params.get('en_aux_inp', 0) and (params.get(
            'aux_inp_file', None) == None):
        raise ValueError(
            'ERROR: please specify auxillary input feature using --aux_inp_file'
        )
        return
    # load the features for all images
    features, aux_inp = loadArbitraryFeatures(params, idxes)

    D, NN = features.shape
    N = len(img_names)

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    BatchGenerator.build_eval_other_sent(BatchGenerator.model_th,
                                         checkpoint_params, model_npy)
    eval_batch_size = params.get('eval_batch_size', 100)
    wordtoix = checkpoint['wordtoix']

    gen_fprop = BatchGenerator.f_eval_other

    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    n = 0

    while n < N:
        print('image %d/%d:\r' % (n, N)),

        cbs = 0
        # encode the image
        batch = []
        while n < N and cbs < eval_batch_size:
            out = {}
            out['image'] = {'feat': features[:, n]}
            out['sentence'] = {
                'raw': sentRaw[n],
                'tokens': word_tokenize(sentRaw[n])
            }
            out['idx'] = n
            if checkpoint_params.get('en_aux_inp', 0):
                out['image']['aux_inp'] = aux_inp[:, n]

            cbs += 1
            n += 1
            batch.append(out)

        inp_list, lenS = prepare_data(batch, wordtoix)

        # perform the work. heavy lifting happens inside
        eval_array = gen_fprop(*inp_list)

        for ix, x in enumerate(batch):
            # build up the output
            img_blob = {}
            img_blob['img_path'] = img_names[x['idx']]
            # encode the top prediction
            img_blob['candidate'] = {
                'text': x['sentence']['raw'],
                'logprob': float(eval_array[0, ix])
            }
            blob['imgblobs'].append(img_blob)

    # dump result struct to file
    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))
示例#18
0
def main(params):
    batch_size = params["batch_size"]
    dataset = params["dataset"]
    word_count_threshold = params["word_count_threshold"]
    do_grad_check = params["do_grad_check"]
    max_epochs = params["max_epochs"]
    host = socket.gethostname()  # get computer hostname

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}  # stores various misc items that need to be passed around the framework

    # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
    # at least word_count_threshold number of times
    misc["wordtoix"], misc["ixtoword"], bias_init_vector = preProBuildWordVocab(
        dp.iterSentences("train"), word_count_threshold
    )

    # delegate the initialization of the model to the Generator class
    BatchGenerator = decodeGenerator(params)
    init_struct = BatchGenerator.init(params, misc)
    model, misc["update"], misc["regularize"] = (init_struct["model"], init_struct["update"], init_struct["regularize"])

    # force overwrite here. This is a bit of a hack, not happy about it
    model["bd"] = bias_init_vector.reshape(1, bias_init_vector.size)

    print "model init done."
    print "model has keys: " + ", ".join(model.keys())
    print "updating: " + ", ".join("%s [%dx%d]" % (k, model[k].shape[0], model[k].shape[1]) for k in misc["update"])
    print "updating: " + ", ".join("%s [%dx%d]" % (k, model[k].shape[0], model[k].shape[1]) for k in misc["regularize"])
    print "number of learnable parameters total: %d" % (
        sum(model[k].shape[0] * model[k].shape[1] for k in misc["update"]),
    )

    if params.get("init_model_from", ""):
        # load checkpoint
        checkpoint = pickle.load(open(params["init_model_from"], "rb"))
        model = checkpoint["model"]  # overwrite the model
        print checkpoint["model"]

    # initialize the Solver and the cost function
    solver = Solver()

    def costfun(batch, model):
        # wrap the cost function to abstract some things away from the Solver
        return RNNGenCost(batch, model, params, misc)

    # calculate how many iterations we need
    num_sentences_total = dp.getSplitSize("train", ofwhat="sentences")
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    eval_period_in_epochs = params["eval_period"]
    eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs))
    abort = False
    top_val_ppl2 = -1
    smooth_train_ppl2 = len(misc["ixtoword"])  # initially size of dictionary of confusion
    val_ppl2 = len(misc["ixtoword"])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    json_worker_status["params"] = params
    json_worker_status["history"] = []
    for it in xrange(max_iters):
        if abort:
            break
        t0 = time.time()
        # fetch a batch of data
        batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
        # evaluate cost, gradient and perform parameter update
        step_struct = solver.step(batch, model, costfun, **params)
        cost = step_struct["cost"]
        dt = time.time() - t0

        # print training statistics
        train_ppl2 = step_struct["stats"]["ppl2"]
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2  # smooth exponentially decaying moving average
        if it == 0:
            smooth_train_ppl2 = train_ppl2  # start out where we start out
        epoch = it * 1.0 / num_iters_one_epoch
        print "%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)" % (
            it,
            max_iters,
            dt,
            epoch,
            cost["loss_cost"],
            cost["reg_cost"],
            train_ppl2,
            smooth_train_ppl2,
        )

        # perform gradient check if desired, with a bit of a burnin time (10 iterations)
        if it == 10 and do_grad_check:
            print "disabling dropout for gradient check..."
            params["drop_prob_encoder"] = 0
            params["drop_prob_decoder"] = 0
            solver.gradCheck(batch, model, costfun)
            print "done gradcheck, exitting."
            sys.exit()  # hmmm. probably should exit here

        # detect if loss is exploding and kill the job if so
        total_cost = cost["total_cost"]
        if it == 0:
            total_cost0 = total_cost  # store this initial cost
        if total_cost > total_cost0 * 2:
            print "Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?"
            abort = True  # set the abort flag, we'll break out

        # logging: write JSON files for visual inspection of the training
        tnow = time.time()
        if tnow > last_status_write_time + 60 * 1:  # every now and then lets write a report
            last_status_write_time = tnow
            jstatus = {}
            jstatus["time"] = datetime.datetime.now().isoformat()
            jstatus["iter"] = (it, max_iters)
            jstatus["epoch"] = (epoch, max_epochs)
            jstatus["time_per_batch"] = dt
            jstatus["smooth_train_ppl2"] = smooth_train_ppl2
            jstatus["val_ppl2"] = val_ppl2  # just write the last available one
            jstatus["train_ppl2"] = train_ppl2
            json_worker_status["history"].append(jstatus)
            status_file = os.path.join(params["worker_status_output_directory"], host + "_status.json")
            try:
                json.dump(json_worker_status, open(status_file, "w"))
            except Exception, e:  # todo be more clever here
                print "tried to write worker status into %s but got error:" % (status_file,)
                print e

        # perform perplexity evaluation on the validation set and save a model checkpoint if it's good
        is_last_iter = (it + 1) == max_iters
        if (((it + 1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
            val_ppl2 = eval_split("val", dp, model, params, misc)  # perform the evaluation on VAL set
            print "validation perplexity = %f" % (val_ppl2,)

            # abort training if the perplexity is no good
            min_ppl_or_abort = params["min_ppl_or_abort"]
            if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0:
                print "aborting job because validation perplexity %f < %f" % (val_ppl2, min_ppl_or_abort)
                abort = True  # abort the job

            write_checkpoint_ppl_threshold = params["write_checkpoint_ppl_threshold"]
            if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
                if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
                    # if we beat a previous record or if this is the first time
                    # AND we also beat the user-defined threshold or it doesnt exist
                    top_val_ppl2 = val_ppl2
                    filename = "model_checkpoint_%s_%s_%s_%.2f.p" % (dataset, host, params["fappend"], val_ppl2)
                    filepath = os.path.join(params["checkpoint_output_directory"], filename)
                    checkpoint = {}
                    checkpoint["it"] = it
                    checkpoint["epoch"] = epoch
                    checkpoint["model"] = model
                    checkpoint["params"] = params
                    checkpoint["perplexity"] = val_ppl2
                    checkpoint["wordtoix"] = misc["wordtoix"]
                    checkpoint["ixtoword"] = misc["ixtoword"]
                    try:
                        pickle.dump(checkpoint, open(filepath, "wb"))
                        print "saved checkpoint in %s" % (filepath,)
                    except Exception, e:  # todo be more clever here
                        print "tried to write checkpoint into %s but got error: " % (filepat,)
                        print e
示例#19
0
def main(params):

    # load the checkpoint
    if params['multi_model'] == 0:
        checkpoint_path = params['checkpoint_path']
        print 'loading checkpoint %s' % (checkpoint_path, )
        checkpoint = pickle.load(open(checkpoint_path, 'rb'))
        checkpoint_params = checkpoint['params']
        model_npy = checkpoint['model']
        checkpoint_params['use_theano'] = 1
        if 'image_feat_size' not in checkpoint_params:
            checkpoint_params['image_feat_size'] = 4096

        BatchGenerator = decodeGenerator(checkpoint_params)
        # Compile and init the theano predictor
        BatchGenerator.prepPredictor(model_npy, checkpoint_params,
                                     params['beam_size'])
        model = BatchGenerator.model_th
    else:
        BatchGenerator = []
        model_npy = []
        modelTh = []
        checkpoint_params = []
        for i, checkpoint_path in enumerate(params['checkpoint_path']):
            checkpoint = pickle.load(open(checkpoint_path, 'rb'))
            model_npy.append(checkpoint['model'])
            checkpoint_params.append(checkpoint['params'])
            checkpoint_params[i]['use_theano'] = 1
            BatchGenerator.append(decodeGenerator(checkpoint_params[i]))
            zipp(model_npy[i], BatchGenerator[i].model_th)
            modelTh.append(BatchGenerator[i].model_th)
            modelTh[i]['comb_weight'] = 1.0 / params['nmodels']

        BatchGenerator[0].prepMultiPredictor(modelTh, checkpoint_params,
                                             params['beam_size'],
                                             params['nmodels'])

    misc = {}
    ixtoword = checkpoint['ixtoword']
    misc['wordtoix'] = checkpoint['wordtoix']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file and setupe feature loading
    root_path = params['root_path']
    img_names_list = open(params['imgList'], 'r').read().splitlines()

    if len(img_names_list[0].rsplit(',')) > 1:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
    else:
        img_names = img_names_list
        idxes = xrange(len(img_names_list))

    #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'):
    #  raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
    #  return
    # load the features for all images
    features, aux_inp = loadArbitraryFeatures(params, idxes)

    N = len(img_names)

    # iterate over all images and predict sentences
    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    kwparams = {'beam_size': params['beam_size']}

    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)

    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        if params['multi_model'] == 0:
            D, NN = features.shape
            img = {}
            img['feat'] = features[:, n]
            if checkpoint_params.get('en_aux_inp', 0):
                img['aux_inp'] = aux_inp[:, n]
            img['local_file_path'] = img_names[n]
            # perform the work. heavy lifting happens inside
            Ys = BatchGenerator.predict([{
                'image': img
            }], model, checkpoint_params, **kwparams)
        else:
            kwparams['nmodels'] = params['nmodels']
            batch = []
            for i in xrange(params['nmodels']):
                img = {}
                img['feat'] = features[i][:, n]
                if checkpoint_params[i].get('en_aux_inp', 0):
                    img['aux_inp'] = aux_inp[i][:, n]
                img['local_file_path'] = img_names[n]
                batch.append({'image': img})
            Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params,
                                                **kwparams)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
        img_blob['candidate'] = {
            'text': candidate,
            'logprob': float(top_prediction[0])
        }

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[
                ci + 1]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            candlist.append({
                'text': candidate,
                'logprob': float(prediction[0])
            })

        img_blob['candidatelist'] = candlist
        blob['imgblobs'].append(img_blob)
        if (n % 5000) == 1:
            print 'writing predictions to %s...' % (save_file, )
            json.dump(blob, open(save_file, 'w'))

    # dump result struct to file
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    html = ''
    for img in blob['imgblobs']:
        html += '<img src="%s" height="400"><br>' % (img['img_path'], )
        html += '(%f) %s <br><br>' % (img['candidate']['logprob'],
                                      img['candidate']['text'])

    html_file = 'result_%s.html' % (params['fname_append'])
    html_file = os.path.join(root_path, html_file)
    print 'writing html result file to %s...' % (html_file, )
    open(html_file, 'w').write(html)
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  dump_folder = params['dump_folder']

  if dump_folder:
    print 'creating dump folder ' + dump_folder
    os.system('mkdir -p ' + dump_folder)
    
  # fetch the data provider
  dp = getDataProvider(dataset, params['pert'])
  dp.load_topic_models(dataset, params['lda'])

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  n = 0
  all_references = []
  all_candidates = []

  # Added for CCA and perturbed dataset
  if params['cca']:
    pert_str = ''
    if params['pert']:
      pert_str = '_pert'
    ccaweights = np.loadtxt('cca/imageprojection_'+str(params['cca'])+pert_str+'.txt', delimiter = ',')
    misc['ccaweights'] = ccaweights
  else:
    ccaweights = None

  for img in dp.iterImages(split = 'test', max_images = max_images):
    n+=1
    print 'image %d/%d:' % (n, max_images)
    references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    kwparams = { 'beam_size' : params['beam_size'], 'normalization': params['normalization'], 'ccaweights' : ccaweights }
    # Added for idf normalization
    if params['normalization']=='idf' or params['normalization']=='combined':
        idf = load_idf()
        kwparams['idf']=idf
        kwparams['words']=ixtoword
    else:
        kwparams['idf']=None
        kwparams['words']=None
    # Added for LDA
    if not params['lda'] == 0:
      Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams)
    else:
      Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams)

    img_blob = {} # we will build this up
    img_blob['img_path'] = img['local_file_path']
    img_blob['imgid'] = img['imgid']

    if dump_folder:
      # copy source file to some folder. This makes it easier to distribute results
      # into a webpage, because all images that were predicted on are in a single folder
      source_file = img['local_file_path']
      target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path']))
      os.system('cp %s %s' % (source_file, target_file))

    # encode the human-provided references
    img_blob['references'] = []
    for gtsent in references:
      print 'GT: ' + gtsent
      img_blob['references'].append({'text': gtsent})

    # now evaluate and encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    # save for later eval
    all_references.append(references)
    all_candidates.append(candidate)

    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
    blob['imgblobs'].append(img_blob)

  # use perl script to eval BLEU score for fair comparison to other research work
  # first write intermediate files
  print 'writing intermediate files into eval/'
  open('eval/output', 'w').write('\n'.join(all_candidates))
  for q in xrange(5):
    open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references]))
  # invoke the perl script to get BLEU scores
  print 'invoking eval/multi-bleu.perl script...'
  owd = os.getcwd()
  os.chdir('eval')
  os.system('./multi-bleu.perl reference < output')
  os.chdir(owd)

  # now also evaluate test split perplexity
  gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
  print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
  blob['gtppl'] = gtppl

  # dump result struct to file
  print 'saving result struct to %s' % (params['result_struct_filename'], )
  json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params):
  batch_size = params['batch_size']
  word_count_threshold = params['word_count_threshold']
  max_epochs = params['max_epochs']
  host = socket.gethostname() # get computer hostname

  # fetch the data provider
  dp = getDataProvider(params)
  
  # Initialize the optimizer 
  solver = Solver(params['solver'])

  params['aux_inp_size'] = dp.aux_inp_size
  params['image_feat_size'] = dp.img_feat_size

  print 'Image feature size is %d, and aux input size is %d'%(params['image_feat_size'],params['aux_inp_size'])

  misc = {} # stores various misc items that need to be passed around the framework

  # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
  # at least word_count_threshold number of times
  misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold)
  params['vocabulary_size'] = len(misc['wordtoix'])
  params['output_size'] = len(misc['ixtoword']) # these should match though
  params['use_dropout'] = 1 

  # This initializes the model parameters and does matrix initializations  
  generator = decodeGenerator(params)
  (gen_inp_list, predLogProb, predIdx, predCand, wOut_emb, updatesLstm) = generator.build_prediction_model(
                                            generator.model_th, params, params['beam_size'])
  wOut_emb = wOut_emb.reshape([wOut_emb.shape[0],wOut_emb.shape[2]])
  f_gen_only = theano.function(gen_inp_list, [predLogProb, predIdx, wOut_emb], name='f_pred', updates=updatesLstm)
  
  modelGen = generator.model_th
  upListGen = generator.update_list
 
  if params['share_Wemb']:
     evaluator = decodeEvaluator(params, modelGen['Wemb'])
  else:
     evaluator = decodeEvaluator(params)
  modelEval = evaluator.model_th
  # Define the computational graph for relating the input image features and word indices to the
  # log probability cost funtion. 
  
  (use_dropout_eval, eval_inp_list,
     f_pred_fns, costs, predTh, modelEval) = evaluator.build_advers_eval(modelEval, params, gen_inp_list, wOut_emb)
  
  # force overwrite here. The bias to the softmax is initialized to reflect word frequencies
  # This is a bit of a hack, not happy about it
  comb_inp_list = eval_inp_list
  for inp in gen_inp_list:
    if inp not in comb_inp_list:
        comb_inp_list.append(inp)
  # Compile an evaluation function.. Doesn't include gradients
  # To be used for validation set evaluation
  f_eval= theano.function(comb_inp_list, costs, name='f_eval', updates=updatesLstm)

  # Now let's build a gradient computation graph and rmsprop update mechanism
  if params['share_Wemb']:
    modelEval.pop('Wemb')
  if params['fix_Wemb']:
    upListGen.remove('Wemb')
  
  modelGenUpD =  OrderedDict()
  for k in upListGen:
   modelGenUpD[k] = modelGen[k]
  gradsEval = tensor.grad(costs[0], wrt=modelEval.values(),add_names=True)
  gradsGen = tensor.grad(costs[1], wrt=modelGenUpD.values(), add_names=True)
 
  lrEval = tensor.scalar(name='lrEval',dtype=config.floatX)
  f_grad_comp_eval, f_param_update_eval, zg_eval, rg_eval, ud_eval= solver.build_solver_model(lrEval, modelEval, gradsEval,
                                      comb_inp_list, costs[0], params)
  
  lrGen = tensor.scalar(name='lrGen',dtype=config.floatX)
  f_grad_comp_gen, f_param_update_gen, zg_gen, rg_gen, ud_gen = solver.build_solver_model(lrGen, modelGenUpD, gradsGen,
                                      comb_inp_list, costs[1], params)

  print 'model init done.'
  print 'model has keys: ' + ', '.join(modelGen.keys())

  # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images
  # Hence in case of coco/flickr this will 5* no of images
  num_sentences_total = dp.getSplitSize('train', ofwhat = 'images')
  num_iters_one_epoch = num_sentences_total / batch_size
  max_iters = max_epochs * num_iters_one_epoch
  iters_eval= num_iters_one_epoch//2
  iters_gen = num_iters_one_epoch//4

  eval_period_in_epochs = params['eval_period']
  eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs))
  top_val_ppl2 = -1
  smooth_train_ppl2 = 0.5 # initially size of dictionary of confusion
  val_ppl2 = len(misc['ixtoword'])
  last_status_write_time = 0 # for writing worker job status reports
  json_worker_status = {}
  json_worker_status['params'] = params
  json_worker_status['history'] = []

  len_hist = defaultdict(int)
  t_print_sec = 60
  ## Initialize the model parameters from the checkpoint file if we are resuming training
  if params['checkpoint_file_name'] != 'None':
    zipp(model_init_from,modelGen)
    #zipp(rg_init,rgGen)
    print("\nContinuing training from previous model\n. Already run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_init['epoch'], \
      checkpoint_init['perplexity']))
  
  pos_samp = np.arange(batch_size,dtype=np.int32)
  print batch_size

  ##############################################################
  # Define signal handler to catch ctl-c or kills so that we can save the model trained till that point
  def signal_handler(signal, frame):
    print('You pressed Ctrl+C! Saving Checkpoint Now before exiting!')
    filename = 'advmodel_checkpoint_%s_%s_%s_%.2f_INT.p' % (params['dataset'], host, params['fappend'], val_ppl2)
    dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2)
    sys.exit(0)
  signal.signal(signal.SIGINT, signal_handler)
  ##############################################################

  for it in xrange(max_epochs):
    epoch = it * 1.0 / num_iters_one_epoch
    # Enable using dropout in training 
    use_dropout_eval.set_value(1.)
    for it2 in xrange(iters_eval): 
        t0 = time.time()
        # fetch a batch of data
        batch,_ = dp.sampPosNegSentSamps(params['eval_batch_size'] - params['rand_negs'])
        real_inp_list, lenS = prepare_data(batch, misc['wordtoix'], maxlen=params['maxlen'], pos_samp=pos_samp, prep_for=params['eval_model'], rand_negs = params['rand_negs'])
        
        # evaluate cost, gradient and perform parameter update
        cost = f_grad_comp_eval(*real_inp_list)
        f_param_update_eval(params['learning_rate_eval'])
        dt = time.time() - t0
        # Track training statistics
        train_ppl2 = (np.e**(-cost)) #step_struct['stats']['ppl2']
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average
        if it2 == 0: smooth_train_ppl2 = train_ppl2 
        if it2 == 0: smooth_train_cost = cost
        else: smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * cost 
        
        tnow = time.time()
        if tnow > last_status_write_time + t_print_sec*1: # every now and then lets write a report
          print 'Eval Cnn in epoch %d: %d/%d sample done in %.3fs. Cost now is %.3f Pplx is %.3f' % (it, it2, iters_eval, dt, \
	    	smooth_train_cost,smooth_train_ppl2)
          last_status_write_time = tnow
    
    print 'Done training the descriminative model for now. Switching to Genereative model'
    print 'Eval N/W in epoch %d: Cost now is %.3f Pplx is %.3f' % (it, smooth_train_cost,smooth_train_ppl2)

    
    filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_EVOnly.p' % (params['dataset'], host, params['fappend'],it, smooth_train_ppl2)
    dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2)
    
    
    # Disable Cnn dropout while training gen network
    use_dropout_eval.set_value(0.)
    for it2 in xrange(iters_gen): 
        t0 = time.time()
        # fetch a batch of data
        batch,_ = dp.sampPosNegSentSamps(params['eval_batch_size'] - params['rand_negs'])
        real_inp_list, lenS = prepare_data(batch, misc['wordtoix'], maxlen=params['maxlen'], pos_samp=pos_samp, prep_for=params['eval_model'], rand_negs = params['rand_negs'])
        #import pdb; pdb.set_trace()

        # evaluate cost, gradient and perform parameter update
        #if any([np.isnan(modelGen[m].get_value()).any() for m in modelGen]):
        #    print 'Somebodys NAN!!!'
        #    break;
        #asd = f_gen_only(real_inp_list[2],real_inp_list[3])
        
        #print it2,asd[-1].shape, real_inp_list[0].shape

        #if asd[-1].shape[0] > real_inp_list[0].shape[0]:
        #   import pdb; pdb.set_trace()


        cost = f_grad_comp_gen(*real_inp_list)

        #print it2,cost
        
        #if any([np.isnan(zg_gen[i].get_value()).any() for i in xrange(len(zg_gen))]):
        #    print 'Somebody zg is NAN!!!'
        #    break;
        #if any([np.isnan(rg_gen[i].get_value()).any() for i in xrange(len(rg_gen))]) or any([(rg_gen[i].get_value()<0).any() for i in xrange(len(rg_gen))]):
        #    print 'Somebody rg is NAN!!!'
        #    break;
        
        f_param_update_gen(params['learning_rate_gen'])
        dt = time.time() - t0
        # print training statistics
        train_ppl2 = (np.e**(-cost)) #step_struct['stats']['ppl2']
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average
        if it2 == 0: smooth_train_ppl2 = train_ppl2 
        if it2 == 0: smooth_train_cost = cost
        else: smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * cost 
        
        tnow = time.time()
        if tnow > last_status_write_time + t_print_sec*1: # every now and then lets write a report
          print 'Gen Lstm in epoch %d: %d/%d sample done in %.3fs. Cost now is %.3f Pplx is %.3f' % (it, it2, iters_gen, dt, \
	    	smooth_train_cost,smooth_train_ppl2)
          last_status_write_time = tnow
    
    print 'Done training the generative model for now. Switching to Genereative model. Final Stats are:'
    print 'Gen Lstm in epoch %d: Cost now is %.3f Pplx is %.3f' % (it, smooth_train_cost,smooth_train_ppl2)
    
    ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good
    is_last_iter = (it+1) == max_iters
    is_last_iter = 1
    if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
      # Disable using dropout in validation 
     # use_dropout.set_value(0.)

     # val_ppl2 = eval_split_theano('val', dp, model, params, misc,f_eval) # perform the evaluation on VAL set
     # 
     # if it - params['lr_decay_st_epoch'] >= 0:
     #   params['learning_rate'] = params['learning_rate'] * params['lr_decay']
     #   params['lr_decay_st_epoch'] += 1
     # 
     # print 'validation perplexity = %f, lr = %f' % (val_ppl2, params['learning_rate'])
     # if params['sample_by_len'] == 1:
     #   print len_hist
        
      val_ppl2 = smooth_train_ppl2
      write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
      if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
        if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
          # if we beat a previous record or if this is the first time
          # AND we also beat the user-defined threshold or it doesnt exist
          #top_val_ppl2 = val_ppl2
          filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_GenDone.p' % (params['dataset'], host, params['fappend'],it, smooth_train_ppl2)
          dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2)
示例#22
0
def gen_from_test(params):
    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    max_images = params['max_images']
    fout = params['output_file']
    tempo = params['tempo']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']
    dump_folder = params['dump_folder']

    if dump_folder:
        print 'creating dump folder ' + dump_folder
        os.system('mkdir -p ' + dump_folder)

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []
    candidates = []
    for img in dp.iterImages(split='test', max_images=max_images):
        n += 1
        print 'image %d/%d:' % (n, max_images)
        references = [' '.join(x['tokens'])
                      for x in img['sentences']]  # as list of lists of tokens
        kwparams = {'beam_size': params['beam_size']}

        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob['img_path'] = img['local_file_path']
        img_blob['imgid'] = img['imgid']

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img['local_file_path']
            target_file = os.path.join(
                dump_folder, os.path.basename(img['local_file_path']))
            os.system('cp %s %s' % (source_file, target_file))

        # encode the human-provided references
        img_blob['references'] = []
        for gtsent in references:
            print 'GT: ' + gtsent
            img_blob['references'].append({'text': gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        candidates.append(candidate)
        print 'PRED: (%f) %s' % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)

        img_blob['candidate'] = {
            'text': candidate,
            'logprob': top_prediction[0]
        }
        blob['imgblobs'].append(img_blob)

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print 'writing intermediate files into eval/'
    open('eval/output', 'w').write('\n'.join(all_candidates))
    for q in xrange(1):
        open('eval/reference' + ` q `,
             'w').write('\n'.join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print 'invoking eval/multi-bleu.perl script...'
    owd = os.getcwd()
    os.chdir('eval')
    os.system('./multi-bleu.perl reference < output')
    os.chdir(owd)

    # now also evaluate test split perplexity
    gtppl = eval_split('test',
                       dp,
                       model,
                       checkpoint_params,
                       misc,
                       eval_max_images=max_images)
    print 'perplexity of ground truth words based on dictionary of %d words: %f' % (
        len(ixtoword), gtppl)
    blob['gtppl'] = gtppl

    # dump result struct to file
    #  print 'saving result struct to %s' % (params['result_struct_filename'], )
    #  json.dump(blob, open(params['result_struct_filename'], 'w'))

    for idx, c in enumerate(candidates):
        cs = c.split()
        for e in cs:
            es = e.split(';')
            pitch = int(es[0])
            pos = es[1]
            pos = convert_pos(pos, idx)
            dur = es[2]
            dur = convert_dur(dur)
            note = pretty_midi.Note(90, pitch, pos, pos + dur)
            new_track.notes.append(note)

    new_midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo)
    new_midi_data.instruments.append(new_track)

    # pre-set chord preogression
    bass_track.notes.append(pretty_midi.Note(90, 36, 0, 1))
    bass_track.notes.append(pretty_midi.Note(90, 47, 1, 2))
    bass_track.notes.append(pretty_midi.Note(90, 45, 2, 3))
    bass_track.notes.append(pretty_midi.Note(90, 43, 3, 4))
    bass_track.notes.append(pretty_midi.Note(90, 41, 4, 5))
    bass_track.notes.append(pretty_midi.Note(90, 40, 5, 6))
    bass_track.notes.append(pretty_midi.Note(90, 38, 6, 7))
    bass_track.notes.append(pretty_midi.Note(90, 43, 7, 8))

    bass_track.notes.append(pretty_midi.Note(90, 36, 8, 9))
    bass_track.notes.append(pretty_midi.Note(90, 47, 9, 10))
    bass_track.notes.append(pretty_midi.Note(90, 45, 10, 11))
    bass_track.notes.append(pretty_midi.Note(90, 43, 11, 12))
    bass_track.notes.append(pretty_midi.Note(90, 41, 12, 13))
    bass_track.notes.append(pretty_midi.Note(90, 40, 13, 14))
    bass_track.notes.append(pretty_midi.Note(90, 38, 14, 15))
    bass_track.notes.append(pretty_midi.Note(90, 43, 15, 16))

    bass_track.notes.append(pretty_midi.Note(90, 45, 16, 17))
    bass_track.notes.append(pretty_midi.Note(90, 41, 17, 18))
    bass_track.notes.append(pretty_midi.Note(90, 36, 18, 19))
    bass_track.notes.append(pretty_midi.Note(90, 43, 19, 20))
    bass_track.notes.append(pretty_midi.Note(90, 45, 20, 21))
    bass_track.notes.append(pretty_midi.Note(90, 41, 21, 22))
    bass_track.notes.append(pretty_midi.Note(90, 43, 22, 23))
    bass_track.notes.append(pretty_midi.Note(90, 43, 23, 24))

    bass_track.notes.append(pretty_midi.Note(90, 36, 24, 25))
    bass_track.notes.append(pretty_midi.Note(90, 47, 25, 26))
    bass_track.notes.append(pretty_midi.Note(90, 45, 26, 27))
    bass_track.notes.append(pretty_midi.Note(90, 43, 27, 28))
    bass_track.notes.append(pretty_midi.Note(90, 41, 28, 29))
    bass_track.notes.append(pretty_midi.Note(90, 40, 29, 30))
    bass_track.notes.append(pretty_midi.Note(90, 38, 30, 31))
    bass_track.notes.append(pretty_midi.Note(90, 43, 31, 32))

    bass_track.notes.append(pretty_midi.Note(90, 36, 32, 33))
    bass_track.notes.append(pretty_midi.Note(90, 47, 33, 34))
    bass_track.notes.append(pretty_midi.Note(90, 45, 34, 35))
    bass_track.notes.append(pretty_midi.Note(90, 43, 35, 36))
    bass_track.notes.append(pretty_midi.Note(90, 41, 36, 37))
    bass_track.notes.append(pretty_midi.Note(90, 40, 37, 38))
    bass_track.notes.append(pretty_midi.Note(90, 38, 38, 39))
    bass_track.notes.append(pretty_midi.Note(90, 43, 39, 40))

    new_midi_data.instruments.append(bass_track)
    adjust_tempo(new_midi_data)
    if params['quantize']:
        quantize(new_midi_data)
    new_midi_data.write(fout)
示例#23
0
def main(params, split):

    #import pdb; pdb.set_trace()

    batch_size = params['batch_size']
    dataset = params['dataset']
    feature_file = params['feature_file']
    class_count_threshold = params['class_count_threshold']
    do_grad_check = params['do_grad_check']
    max_epochs = params['max_epochs']
    host = socket.gethostname()  # get computer hostname

    json_file = 'dataset_mmdb_book_fps_30_samplesize_25_split_%d.json' % (
        split)

    # fetch the data provider
    dp = getDataProvider(dataset, feature_file, json_file)

    misc = {
    }  # stores various misc items that need to be passed around the framework

    # go over all training classes and find the vocabulary we want to use, i.e. the classes that occur
    # at least class_count_threshold number of times
    misc['classtoix'], misc[
        'ixtoclass'], bias_init_vector = preProBuildWordVocab(
            dp.iterSentences('train'), class_count_threshold)

    # delegate the initialization of the model to the Generator class
    BatchGenerator = decodeGenerator(params)
    init_struct = BatchGenerator.init(params, misc)
    model, misc['update'], misc['regularize'] = (init_struct['model'],
                                                 init_struct['update'],
                                                 init_struct['regularize'])

    # force overwrite here. This is a bit of a hack, not happy about it
    model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

    print 'model init done.'
    print 'model has keys: ' + ', '.join(model.keys())
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['update'])
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['regularize'])
    print 'number of learnable parameters total: %d' % (sum(
        model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

    if params.get('init_model_from', ''):
        # load checkpoint
        checkpoint = pickle.load(open(params['init_model_from'], 'rb'))
        model = checkpoint['model']  # overwrite the model

    # initialize the Solver and the cost function
    solver = Solver()

    def costfun(batch, model):
        # wrap the cost function to abstract some things away from the Solver
        return RNNGenCost(batch, model, params, misc)

    # calculate how many iterations we need
    num_sentences_total = dp.getSplitSize('train', ofwhat='sentences')
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    eval_period_in_epochs = params['eval_period']
    eval_period_in_iters = max(
        1, int(num_iters_one_epoch * eval_period_in_epochs))
    abort = False
    top_val_ppl2 = -1
    smooth_train_ppl2 = len(
        misc['ixtoclass'])  # initially size of dictionary of confusion
    val_ppl2 = len(misc['ixtoclass'])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    json_worker_status['params'] = params
    json_worker_status['history'] = []
    lastsavedcheckpoint = ''
    for it in xrange(max_iters):
        if abort: break
        t0 = time.time()
        # fetch a batch of data
        batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
        # evaluate cost, gradient and perform parameter update
        step_struct = solver.step(batch, model, costfun, **params)
        cost = step_struct['cost']
        dt = time.time() - t0

        # print training statistics
        train_ppl2 = step_struct['stats']['ppl2']
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2  # smooth exponentially decaying moving average
        if it == 0:
            smooth_train_ppl2 = train_ppl2  # start out where we start out
        epoch = it * 1.0 / num_iters_one_epoch
        print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
              % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
                 train_ppl2, smooth_train_ppl2)

        print 'last saved checkpoint in %s' % (lastsavedcheckpoint, )
        # perform gradient check if desired, with a bit of a burnin time (10 iterations)
        if it == 10 and do_grad_check:
            print 'disabling dropout for gradient check...'
            params['drop_prob_encoder'] = 0
            params['drop_prob_decoder'] = 0
            solver.gradCheck(batch, model, costfun)
            print 'done gradcheck, exitting.'
            sys.exit()  # hmmm. probably should exit here

        # detect if loss is exploding and kill the job if so
        total_cost = cost['total_cost']
        if it == 0:
            total_cost0 = total_cost  # store this initial cost
        if total_cost > total_cost0 * 2:
            print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
            abort = True  # set the abort flag, we'll break out

        # logging: write JSON files for visual inspection of the training
        tnow = time.time()
        if tnow > last_status_write_time + 60 * 1:  # every now and then lets write a report
            last_status_write_time = tnow
            jstatus = {}
            jstatus['time'] = datetime.datetime.now().isoformat()
            jstatus['iter'] = (it, max_iters)
            jstatus['epoch'] = (epoch, max_epochs)
            jstatus['time_per_batch'] = dt
            jstatus['smooth_train_ppl2'] = smooth_train_ppl2
            jstatus['val_ppl2'] = val_ppl2  # just write the last available one
            jstatus['train_ppl2'] = train_ppl2
            json_worker_status['history'].append(jstatus)
            status_file = os.path.join(
                params['worker_status_output_directory'],
                host + '_status.json')
            try:
                json.dump(json_worker_status, open(status_file, 'w'))
            except Exception, e:  # todo be more clever here
                print 'tried to write worker status into %s but got error:' % (
                    status_file, )
                print e

        # perform perplexity evaluation on the validation set and save a model checkpoint if it's good
        is_last_iter = (it + 1) == max_iters
        if (((it + 1) % eval_period_in_iters) == 0
                and it < max_iters - 5) or is_last_iter:
            val_ppl2 = eval_split('val', dp, model, params,
                                  misc)  # perform the evaluation on VAL set
            print 'validation perplexity = %f' % (val_ppl2, )

            # abort training if the perplexity is no good
            min_ppl_or_abort = params['min_ppl_or_abort']
            if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0:
                print 'aborting job because validation perplexity %f < %f' % (
                    val_ppl2, min_ppl_or_abort)
                abort = True  # abort the job

            write_checkpoint_ppl_threshold = params[
                'write_checkpoint_ppl_threshold']
            if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
                if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
                    # if we beat a previous record or if this is the first time
                    # AND we also beat the user-defined threshold or it doesnt exist
                    top_val_ppl2 = val_ppl2

                    filename = 'model_checkpoint_%s_%s_%s_alpha_%2.2f_beta_%2.2f_split_%d.p' % (
                        dataset, host, params['fappend'], params['alpha'],
                        params['beta'], split)
                    filepath = os.path.join(
                        params['checkpoint_output_directory'], filename)
                    checkpoint = {}
                    checkpoint['it'] = it
                    checkpoint['epoch'] = epoch
                    checkpoint['model'] = model
                    checkpoint['params'] = params
                    checkpoint['perplexity'] = val_ppl2
                    checkpoint['classtoix'] = misc['classtoix']
                    checkpoint['ixtoclass'] = misc['ixtoclass']
                    checkpoint['json_file'] = json_file

                    try:
                        if not (params['fappend'] == 'test'):
                            # if it == max_iters - 1 :
                            pickle.dump(checkpoint, open(filepath, "wb"))
                            print 'saved checkpoint in %s' % (filepath, )
                            lastsavedcheckpoint = filepath
                    except Exception, e:  # todo be more clever here
                        print 'tried to write checkpoint into %s but got error: ' % (
                            filepath, )
                        print e
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    # TODO FIND EASY WAY TO CALL FILE WITH PROPER root
    root_path = params['root_path']
    img_names = open(os.path.join(root_path, 'tasks.txt'),
                     'r').read().splitlines()

    # load the features for all images
    features_path = os.path.join(root_path, 'vgg_feats.mat')
    features_struct = scipy.io.loadmat(features_path)
    features = features_struct[
        'feats']  # this is a 4096 x N numpy array of features
    D, N = features.shape

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        img = {}
        img['feat'] = features[:, n]
        img['local_file_path'] = img_names[n]
        print img['local_file_path']

        # perform the work. heavy lifting happens inside
        kwparams = {'beam_size': params['beam_size']}
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        print Ys
        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        # top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
        # top_prediction = top_predictions[0] # these are sorted with highest on top
        # candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        # print 'PRED: (%f) %s' % (top_prediction[0], candidate)
        # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
        # blob['imgblobs'].append(img_blob)

        # encode the top prediction my attempt at showing all candidates
        img_blob['candidates'] = []

        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        print 'Number of name candidates', top_predictions
        # TODO TIME IT. SEEMS PRETTY FAST THOUGH
        for i in range(0, len(top_predictions)):

            top_prediction = top_predictions[
                i]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[ix] for ix in top_prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            print[ixtoword[ix] for ix in top_prediction[1] if ix > 0]
            print 'PRED: (%f) %s' % (top_prediction[0], candidate)

            img_blob['candidates'].append({
                'text': candidate,
                'logprob': top_prediction[0]
            })
            # VERY IMPORTANT LINE
            blob['imgblobs'].append(img_blob)

    # dump result struct to file
    save_file = os.path.join(root_path, 'result_struct.json')
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    html = ''
    for img in blob['imgblobs']:
        html += '<img src="%s" height="400"><br>' % (img['img_path'], )
        #print 'Number of name candidates', len(img['candidates'])
        for i in range(0, len(img['candidates'])):

            html += '(%f) %s <br><br>' % (img['candidates'][i]['logprob'],
                                          img['candidates'][i]['text'])
        #print html
    html_file = os.path.join(root_path, 'result.html')
    print 'writing html result file to %s...' % (html_file, )
    open(html_file, 'w').write(html)
示例#25
0
def hold_comittee_discussion(params, com_dataset):

    n_memb = com_dataset['n_memb']
    n_sent = com_dataset['n_sent']
    n_imgs = len(com_dataset['images'])

    eval_array = np.zeros((n_memb, n_imgs * n_sent))
    model_id = 0
    for mod in com_dataset['members_model']:
        checkpoint = pickle.load(open(mod, 'rb'))
        checkpoint_params = checkpoint['params']
        dataset = checkpoint_params['dataset']
        model_npy = checkpoint['model']

        checkpoint_params['use_theano'] = 1

        if 'image_feat_size' not in checkpoint_params:
            checkpoint_params['image_feat_size'] = 4096

        checkpoint_params['data_file'] = params['jsonFname'].rsplit('/')[-1]
        dp = getDataProvider(checkpoint_params)

        ixtoword = checkpoint['ixtoword']

        blob = {
        }  # output blob which we will dump to JSON for visualizing the results
        blob['params'] = params
        blob['checkpoint_params'] = checkpoint_params
        blob['imgblobs'] = []

        # iterate over all images in test set and predict sentences
        BatchGenerator = decodeGenerator(checkpoint_params)

        BatchGenerator.build_eval_other_sent(BatchGenerator.model_th,
                                             checkpoint_params, model_npy)

        eval_batch_size = params.get('eval_batch_size', 100)
        eval_max_images = params.get('eval_max_images', -1)
        wordtoix = checkpoint['wordtoix']

        split = 'test'
        print 'evaluating %s performance in batches of %d' % (split,
                                                              eval_batch_size)
        logppl = 0
        logppln = 0
        nsent = 0
        gen_fprop = BatchGenerator.f_eval_other
        blob['params'] = params
        c_id = 0
        for batch in dp.iterImageSentencePairBatch(
                split=split,
                max_batch_size=eval_batch_size,
                max_images=eval_max_images):
            xWd, xId, maskd, lenS = dp.prepare_data(batch, wordtoix)
            eval_array[model_id,
                       c_id:c_id + xWd.shape[1]] = gen_fprop(xWd, xId, maskd)
            c_id += xWd.shape[1]

        model_id += 1

    # Calculate oracle scores
    bleu_array = eval_bleu_all_cand(params, com_dataset)
    eval_results = {}
    eval_results['logProb_feat'] = eval_array
    eval_results['OracleBleu'] = bleu_array
    #Save the mutual evaluations

    params['comResFname'] = 'committee_evalSc_%s.json' % (params['fappend'])
    com_dataset['com_evaluation'] = params['comResFname']
    pickle.dump(eval_results, open(params['comResFname'], "wb"))
    json.dump(com_dataset, open(params['jsonFname'], 'w'))

    return eval_array
示例#26
0
    def __init__(
        self,
        model_def_file,
        pretrained_model_file,
        mean_file,
        raw_scale,
        class_labels_file,
        bet_file,
        image_dim,
        gpu_mode,
    ):
        logging.info("Loading net and associated files...")
        if gpu_mode:
            caffe.set_mode_gpu()
        else:
            caffe.set_mode_cpu()

        ## load models
        # vgg16
        self.net = caffe.Classifier(
            model_def_file,
            pretrained_model_file,
            image_dims=(image_dim, image_dim),
            raw_scale=raw_scale,
            mean=np.array([103.939, 116.779, 123.68]),
            channel_swap=(2, 1, 0),
        )
        logging.info("Load vision model, %s", model_def_file)
        # googlenet
        self.net_google = caffe.Classifier(
            self.googlenet_args["model_def_file"],
            self.googlenet_args["pretrained_model_file"],
            image_dims=(image_dim, image_dim),
            raw_scale=raw_scale,
            mean=np.float32([104.0, 116.0, 122.0]),
            channel_swap=(2, 1, 0),
        )
        logging.info("Load vision model, %s", self.googlenet_args["model_def_file"])
        # language model
        self.rnn_params["beam_size"] = 10
        self.rnn_checkpoint = cPickle.load(open(self.rnn_params["checkpoint_path"], "rb"))
        self.rnn_checkpoint_params = self.rnn_checkpoint["params"]
        self.rnn_model = self.rnn_checkpoint["model"]
        self.rnn_ixtoword = self.rnn_checkpoint["ixtoword"]
        self.rnn_BatchGenerator = decodeGenerator(self.rnn_checkpoint)
        self.rnn_kwparams = {"beam_size": self.rnn_params["beam_size"]}
        logging.info("Load LSTM model, %s", self.rnn_params["checkpoint_path"])

        # generate N bit lookup table
        self.lookup = np.asarray([bin(i).count("1") for i in range(1 << 16)])

        # load reference bit model
        file_reader = open(self.database_param, "rb")
        self.database = cPickle.load(file_reader)
        file_reader.close()
        logging.info("Load database from {}".format(self.database_param))
        logging.info("database shape {}".format(self.database["ref"].shape))

        with open(class_labels_file) as f:
            labels_df = pd.DataFrame(
                [
                    {"synset_id": l.strip().split(" ")[0], "name": " ".join(l.strip().split(" ")[1:]).split(",")[0]}
                    for l in f.readlines()
                ]
            )
        self.labels = labels_df.sort("synset_id")["name"].values

        self.bet = cPickle.load(open(bet_file))
        # A bias to prefer children nodes in single-chain paths
        # I am setting the value to 0.1 as a quick, simple model.
        # We could use better psychological models here...
        self.bet["infogain"] -= np.array(self.bet["preferences"]) * 0.1
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']

  model_npy = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  if 'use_theano' not in  checkpoint_params:
    checkpoint_params['use_theano'] = 1
  
  checkpoint_params['use_theano'] = 1

  if 'image_feat_size' not in  checkpoint_params:
    checkpoint_params['image_feat_size'] = 4096 

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = params['root_path']
  img_names_list = open(params['imgList'], 'r').read().splitlines()

  if len(img_names_list[0].rsplit(',')) > 2: 
    img_names = [x.rsplit (',')[0] for x in img_names_list]
    sentRaw = [x.rsplit (',')[1] for x in img_names_list]
    idxes = [int(x.rsplit (',')[2]) for x in img_names_list]
  elif len(img_names_list[0].rsplit(',')) == 2:
    img_names = [x.rsplit (',')[0] for x in img_names_list]
    sentRaw = [x.rsplit (',')[1] for x in img_names_list]
    idxes = xrange(len(img_names_list))
  else:
    print 'ERROR: List should atleast contain image name and a corresponding sentence'
    return

  if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file',None) == None):
    raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
    return
  # load the features for all images
  features, aux_inp = loadArbitraryFeatures(params, idxes)

  D,NN = features.shape
  N = len(img_names) 

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy)
  eval_batch_size = params.get('eval_batch_size',100)
  wordtoix = checkpoint['wordtoix']
  
  gen_fprop = BatchGenerator.f_eval_other
  
  print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
    checkpoint['perplexity']))
  
  n = 0
  
  while n < N:
    print('image %d/%d:\r' % (n, N)),
    
    cbs = 0
    # encode the image
    batch = []
    while n < N and cbs < eval_batch_size:
        out = {}
        out['image'] = {'feat':features[:, n]}
        out['sentence'] = {'raw': sentRaw[n],'tokens':word_tokenize(sentRaw[n])}
        out['idx'] = n
        if checkpoint_params.get('en_aux_inp',0):
            out['image']['aux_inp'] = aux_inp[:, n]

        cbs += 1
        n += 1
        batch.append(out)
    
    inp_list, lenS = prepare_data(batch,wordtoix)

    # perform the work. heavy lifting happens inside
    eval_array = gen_fprop(*inp_list)

    for ix,x in enumerate(batch):
        # build up the output
        img_blob = {}
        img_blob['img_path'] = img_names[x['idx']]
        # encode the top prediction
        img_blob['candidate'] = {'text': x['sentence']['raw'], 'logprob': float(eval_array[0,ix])}
        blob['imgblobs'].append(img_blob)

  # dump result struct to file
  jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) 
  save_file = os.path.join(root_path, jsonFname)
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))
def run(checkpoint):

    max_images = -1
    dump_folder = ""

    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model = checkpoint["model"]
    beam_size = 1
    # dump_folder = params['dump_folder']

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    blob = {}  # output blob which we will dump to JSON for visualizing the results
    # blob['params'] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []
    captions_res = []
    for img in dp.iterImages(split="test", max_images=max_images):
        n += 1
        print "image %d/%d:" % (n, max_images)
        references = [" ".join(x["tokens"]) for x in img["sentences"]]  # as list of lists of tokens
        kwparams = {"beam_size": beam_size}
        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob["img_path"] = img["local_file_path"]
        img_blob["imgid"] = img["imgid"]
        img_blob["id"] = img["id"]

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img["local_file_path"]
            target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"]))
            os.system("cp %s %s" % (source_file, target_file))

        # encode the human-provided references
        img_blob["references"] = []
        flag = True
        for gtsent in references:
            if flag:
                print "GT: " + gtsent
                flag = False
            img_blob["references"].append({"text": gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[0]  # these are sorted with highest on top
        candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0])  # ix 0 is the END token, skip that
        print "PRED: (%f) %s" % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)
        captions_res.append({"image_id": img_blob["id"], "caption": candidate})
        img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]}
        blob["imgblobs"].append(img_blob)

    alg_name = checkpoint["algorithm"]
    res_file_name = checkpoint["outdir"] + "/captions_val_" + alg_name + "_results.json"
    json.dump(captions_res, open(res_file_name, "w"))

    from eval_tools import metrics

    scores = metrics.run(dataset, alg_name, checkpoint["outdir"])

    return scores
示例#29
0
def main(video_name):

    # load the checkpoint
    checkpoint_path = '/home/t-yuche/neuraltalk/models/flickr8k_cnn_lstm_v1.p'
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = os.path.join('/mnt/frames', video_name)
    all_frames = [
        os.path.join('/mnt/frames/', video_name, x)
        for x in os.listdir(os.path.join('/mnt/frames', video_name))
    ]

    # Load unprocessed frames to filenames
    fei_cap_data = load_video_caption('/mnt/tags/fei-caption-keyframe',
                                      video_name)
    processed_frames = [x['img_path'] for x in fei_cap_data]
    blob['imgblobs'] = blob['imgblobs'] + fei_cap_data

    img_names = []
    for frame in all_frames:
        if frame not in processed_frames:
            img_names += [frame]

    # load the features for all images
    '''
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  print features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape
  '''
    features_path = os.path.join('/mnt/tags/fei-caption-all-pickle',
                                 video_name + '.pickle')
    features = pickle.load(open(features_path))
    features = features.T
    #features = features_struct['feats'] # this is a 4096 x N numpy array of features
    D, N = features.shape

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        img = {}
        img['feat'] = features[:, n]
        img['local_file_path'] = img_names[n]

        # perform the work. heavy lifting happens inside
        kwparams = {'beam_size': 20}
        tic = time.time()
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)
        toc = time.time()

        print 'image %d/%d: %f' % (n, N, toc - tic)
        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']
        img_blob['rnn_time'] = (toc - tic)
        img_blob['candidate'] = {'text': [], 'logprob': []}
        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        for i in xrange(min(5, len(top_predictions))):
            top_prediction = top_predictions[i]
            candidate = ' '.join([
                ixtoword[ix] for ix in top_prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate)
            img_blob['candidate']['text'] += [candidate]
            img_blob['candidate']['logprob'] += [top_prediction[0]]
        '''
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    '''
        #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
        blob['imgblobs'].append(img_blob)

    # dump result struct to file
    #save_file = os.path.join(root_path, 'result_struct.json')
    save_file = os.path.join('/mnt/tags/fei-caption-all',
                             video_name + '_5_caption.json')
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    '''
示例#30
0
    def predict_sentence(self, image):
        try:

            ################ FEATURE EXTRACTION ##############
	    
            cnn_model_def = self.cnn_model_def
            cnn_model_params = self.cnn_model_params
            rnn_model = self.rnn_model
	    

            def predict(in_data, net):
                """
                Get the features for a batch of data using network

                Inputs:
                in_data: data batch
                """

                out = net.forward(**{net.inputs[0]: in_data})
                features = out[net.outputs[0]].squeeze(axis=(2,3))
                return features


            def batch_predict(filenames, net):
                """
                Get the features for all images from filenames using a network

                Inputs:
                filenames: a list of names of image files

                Returns:
                an array of feature vectors for the images in that file
                """
		        IMAGE_PATH = '/tmp/captionly_demo_uploads'

                N, C, H, W = net.blobs[net.inputs[0]].data.shape
                F = net.blobs[net.outputs[0]].data.shape[1]
                Nf = len(filenames)
                Hi, Wi, _ = imread(IMAGE_PATH + '/' + filenames[0]).shape
                allftrs = np.zeros((Nf, F))
                for i in range(0, Nf, N):
                    in_data = np.zeros((N, C, H, W), dtype=np.float32)

                    batch_range = range(i, min(i+N, Nf))
                    batch_filenames = [filenames[j] for j in batch_range]
                    Nb = len(batch_range)

                    batch_images = np.zeros((Nb, 3, H, W))
                    for j,fname in enumerate(batch_filenames):
                        im = imread(IMAGE_PATH + '/' + fname)
                        if len(im.shape) == 2:
                            im = np.tile(im[:,:,np.newaxis], (1,1,3))
                        # RGB -> BGR
                        im = im[:,:,(2,1,0)]
                        # mean subtraction
                        im = im - np.array([103.939, 116.779, 123.68])
                        # resize
                        im = imresize(im, (H, W))
                        # get channel in correct dimension
                        im = np.transpose(im, (2, 0, 1))
                        batch_images[j,:,:,:] = im

                    # insert into correct place
                    in_data[0:len(batch_range), :, :, :] = batch_images

                    # predict features
                    ftrs = predict(in_data, net)

                    for j in range(len(batch_range)):
                        allftrs[i+j,:] = ftrs[j,:]

                        print 'Done %d/%d files' % (i+len(batch_range), len(filenames))

                        return allftrs

            if self.gpu_mode:
                caffe.set_mode_gpu()
            else:   
                caffe.set_mode_cpu()

            net = caffe.Net(cnn_model_def, cnn_model_params)
            caffe.set_phase_test()
            """
            filenames = []
            with open(args.files) as fp:
                for line in fp:
                    filename = line.strip().split()[0]
                    filenames.append(filename)
            """
            filenames = ['2015-05-17_17:28:44.2513807EGRMwN.jpg']
            allftrs = batch_predict(filenames, net)

            # # store the features in a pickle file
            # with open(args.out, 'w') as fp:
            #     pickle.dump(allftrs, fp)

            # save to mat file 
            print "Saving file to vgg_feats.mat..."
            io.savemat(UPLOAD_FOLDER + '/vgg_feats',{'feats':allftrs.T})

            #################### PREDICTION ##################

            dim = 300
            # load the checkpoint
            checkpoint_path = rnn_model
            # load glove vect dict

            glove_dict_path = '../../vecDict.pickle'
            with open(glove_dict_path, 'rb') as handle:
                vec_dict = pickle.load(handle)

                print 'loading checkpoint %s' % (checkpoint_path, )
                checkpoint = pickle.load(open(checkpoint_path, 'rb'))
                checkpoint_params = checkpoint['params']
                dataset = checkpoint_params['dataset']
                model = checkpoint['model']
                misc = {}
                misc['wordtoix'] = checkpoint['wordtoix']
                ixtoword = checkpoint['ixtoword']

            # output blob which we will dump to JSON for visualizing the results
            blob = {} 
            #blob['params'] = params
            blob['checkpoint_params'] = checkpoint_params
            blob['imgblobs'] = []

            # create and load the tasks.txt file
            # root_path = params['root_path']
            allImages = os.listdir(UPLOAD_FOLDER)
            with open(os.path.join(UPLOAD_FOLDER, 'tasks.txt'), 'w') as f:
                for k, v in enumerate(allImages):
                    if k==len(allImages)-1: 
                        f.write(v)
                    else: 
                        f.write(v + '\n')


            # load the features for all images
            features_path = os.path.join(root_path, 'vgg_feats.mat')
            features_struct = scipy.io.loadmat(features_path)
            features = features_struct['feats'] # this is a 4096 x N numpy array of features
            D,N = features.shape

            fileNameToVector = {}
            # iterate over all images and predict sentences
            BatchGenerator = decodeGenerator(checkpoint_params)
            for n in xrange(N):
                print 'image %d/%d:' % (n, N)

                # encode the image
                img = {}
                img['feat'] = features[:, n]
                img['local_file_path'] =img_names[n]

                # perform the work. heavy lifting happens inside
                kwparams = { 'beam_size' : params['beam_size'] }
                Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

                # build up the output
                img_blob = {}
                img_blob['img_path'] = img['local_file_path']

                # encode the top prediction
                top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
                top_prediction = top_predictions[0] # these are sorted with highest on top
                candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
                print 'PRED: (%f) %s' % (top_prediction[0], candidate)

                currSentenceVector = np.zeros(dim)
                numWords = 0
                for word in candidate.split():
                  if word in vec_dict:
                    currSentenceVector += vec_dict[word].astype(np.float)
                    numWords += 1
                currSentenceVector /= numWords
                fileNameToVector[img['local_file_path']] = currSentenceVector

                img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
                blob['imgblobs'].append(img_blob)

            # dump result struct to file
            save_file = os.path.join(root_path, 'result_struct.json')
            print 'writing predictions to %s...' % (save_file, )
            json.dump(blob, open(save_file, 'w'))

            # dump the fileNameToVector mapping to a pickle file
            with open('fileNameToVector.pickle', 'wb') as handle:
            pickle.dump(fileNameToVector, handle)

            # dump output html
            html = ''
            for img in blob['imgblobs']:
                html += '<img src="%s" height="400"><br>' % (img['img_path'], )
                html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])
            html_file = os.path.join(root_path, 'result.html')
            print 'writing html result file to %s...' % (html_file, )
            open(html_file, 'w').write(html)

            return render_template("result.html", title = 'Results')

            # return (True, meta, result, '%.3f' % (endtime - starttime))


            #img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines()

            # starttime = time.time()
            # scores = self.net.predict([image], oversample=True).flatten()
            # endtime = time.time()

            # indices = (-scores).argsort()[:5]
            # predictions = self.labels[indices]

            # # In addition to the prediction text, we will also produce
            # # the length for the progress bar visualization.
            # meta = [
            #     (p, '%.5f' % scores[i])
            #     for i, p in zip(indices, predictions)
            # ]
            # logging.info('result: %s', str(meta))

            # # Compute expected information gain
            # expected_infogain = np.dot(
            #     self.bet['probmat'], scores[self.bet['idmapping']])
            # expected_infogain *= self.bet['infogain']

            # # sort the scores
            # infogain_sort = expected_infogain.argsort()[::-1]
            # bet_result = [(self.bet['words'][v], '%.5f' % expected_infogain[v])
            #               for v in infogain_sort[:5]]
            # logging.info('bet result: %s', str(bet_result))

            # return (True, meta, bet_result, '%.3f' % (endtime - starttime))

        except Exception as err:
            logging.info('Classification error: %s', err)
            return (False, 'Something went wrong when classifying the '
                           'image. Maybe try another one?')
示例#31
0
def main(feature):
    """
    description
    """
    
    import cPickle as pickle
    #from scipy.io import loadmat
    import sys
    sys.path.append('/home/young/Desktop/ImageCaption')
    #sys.path.append('I:\json\neuraltalk-master\imagernn')
    from imagernn.solver import Solver
    from imagernn.imagernn_utils import decodeGenerator, eval_split

    #rootdir = 'I:\json\neuraltalk-master'
    #mat = loadmat(r'I:\json\neuraltalk-master\model\vgg_feats.mat')
    #feature = mat.get('feats')
    N = 1
    
    # deal with images and predict sentence
    # load the checkpoint

    checkpoint_path_top5 = [ r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_18.47.p', \
                            r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_24.64.p', \
                            r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_24.56.p']
                            
    blob_top3 = {} # dict to store the top5 generated sentences
    for i in range(1):

        checkpoint_path = checkpoint_path_top5[i]
        #print 'loading checkpoint %s' % (checkpoint_path, )
        checkpoint = pickle.load(open(checkpoint_path, 'rb'))
        #print checkpoint.keys()
        checkpoint_params = checkpoint['params']
        dataset = checkpoint_params['dataset']
        model = checkpoint['model']
        misc = {}
        misc['wordtoix'] = checkpoint['wordtoix']
        ixtoword = checkpoint['ixtoword']
    
        # output blob which we will dump to JSON for visualizing the results
        blob = {} 
        blob['params'] = {}
        blob['checkpoint_params'] = checkpoint_params
        blob['imgblobs'] = []
    
        BatchGenerator = decodeGenerator(checkpoint_params)
        for n in xrange(1):
            print 'image %d/%d:' % (n+1, N)
    
            # ecode the image
            img = {}
            img['feat'] = feature[:, n]
            #img_names = open(test_file, 'r').read().splitlines()
            img['local_file_path'] = 'test.jpg'
    
            # perform the work. heavy lifting happens inside
            kwparams = { 'beam_size' : 2 }
            Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    
            # build up the output
            img_blob = {}
            img_blob['img_path'] = img['local_file_path']
    
            # encode the top prediction
            top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
            top_prediction = top_predictions[0] # these are sorted with highest on top
            candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
            print 'PRED: (%f) %s' % (top_prediction[0], candidate)
            img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
            blob['imgblobs'].append(img_blob)
    
        blob_top3[str(i)] = blob
        
    return blob_top3
def hold_comittee_discussion(params, com_dataset):
    
    n_memb = com_dataset['n_memb']
    n_sent = com_dataset['n_sent']
    n_imgs = len(com_dataset['images'])

    eval_array = np.zeros((n_memb,n_imgs*n_sent))
    model_id = 0  
    for mod in com_dataset['members_model']:
        checkpoint = pickle.load(open(mod, 'rb'))
        checkpoint_params = checkpoint['params']
        dataset = checkpoint_params['dataset']
        model_npy = checkpoint['model']

        checkpoint_params['use_theano'] = 1

        if 'image_feat_size' not in  checkpoint_params:
          checkpoint_params['image_feat_size'] = 4096 

        checkpoint_params['data_file'] = params['jsonFname'].rsplit('/')[-1]
        dp = getDataProvider(checkpoint_params)

        ixtoword = checkpoint['ixtoword']

        blob = {} # output blob which we will dump to JSON for visualizing the results
        blob['params'] = params
        blob['checkpoint_params'] = checkpoint_params
        blob['imgblobs'] = []

        # iterate over all images in test set and predict sentences
        BatchGenerator = decodeGenerator(checkpoint_params)

        BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy)

        eval_batch_size = params.get('eval_batch_size',100)
        eval_max_images = params.get('eval_max_images', -1)
        wordtoix = checkpoint['wordtoix']

        split = 'test'
        print 'evaluating %s performance in batches of %d' % (split, eval_batch_size)
        logppl = 0
        logppln = 0
        nsent = 0
        gen_fprop = BatchGenerator.f_eval_other
        blob['params'] = params
        c_id = 0
        for batch in dp.iterImageSentencePairBatch(split = split, max_batch_size = eval_batch_size, max_images = eval_max_images):
          xWd, xId, maskd, lenS = dp.prepare_data(batch,wordtoix)
          eval_array[model_id, c_id:c_id + xWd.shape[1]] = gen_fprop(xWd, xId, maskd)
          c_id += xWd.shape[1]
        
        model_id +=1
    
    # Calculate oracle scores
    bleu_array = eval_bleu_all_cand(params,com_dataset)
    eval_results = {}
    eval_results['logProb_feat'] = eval_array
    eval_results['OracleBleu'] = bleu_array
    #Save the mutual evaluations

    params['comResFname'] = 'committee_evalSc_%s.json' % (params['fappend'])
    com_dataset['com_evaluation'] = params['comResFname']
    pickle.dump(eval_results, open(params['comResFname'], "wb"))
    json.dump(com_dataset,open(params['jsonFname'], 'w'))

    return eval_array
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = params['root_path']
  task_file = params['task_file']
  img_names = open(task_file, 'r').read().splitlines()

  video_name = task_file.split('/')[-1][:-10]
  caption_data = load_video_caption('/mnt/tags/fei-caption', video_name)
  blob['imgblobs'] = blob['imgblobs'] + caption_data
  # load the features for all images
  '''
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  print features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape
  '''
  features_path = params['feature_file']
  features = pickle.load(open(features_path))
  features = features.T
  #features = features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] =img_names[n]

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : params['beam_size'] }
    tic = time.time()
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    toc = time.time()

    print 'image %d/%d: %f' % (n, N, toc-tic)
    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']
    img_blob['rnn_time'] = (toc-tic)
    img_blob['candidate'] = {'text': [], 'logprob': []}
    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    for i in xrange(min(5, len(top_predictions))):
        top_prediction = top_predictions[i]  
        candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate)
        img_blob['candidate']['text'] += [candidate]
        img_blob['candidate']['logprob'] += [top_prediction[0]]
    '''
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    '''    
    #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

  # dump result struct to file
  #save_file = os.path.join(root_path, 'result_struct.json')
  save_file = params['out_file']
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  '''
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']
  gt_dataset = params['gt_dataset']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  dump_folder = params['dump_folder']
   
  rootpath = '/home/lgp105b/xirong/VisualSearch'
  collection = 'flickr8k'
  version = 'baidu'
  fout = open(os.path.join(rootpath,collection,'SimilarityIndex','test_sent','%s.top20.sentid.txt'%dataset),'w')
  fout_s = open(os.path.join(rootpath,collection,'SimilarityIndex','test_sent','%s.top20.sentid.score.txt'%dataset),'w')

  if dump_folder:
    print 'creating dump folder ' + dump_folder
    os.system('mkdir -p ' + dump_folder)

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  print "len(misc['wordtoix']):",len(misc['wordtoix'])
  ixtoword = checkpoint['ixtoword']
 
  #get the groundtruth sentences encoded in model-dataset's chvob
  vob2idx = chinese_vob_idx(rootpath,collection,version)
  testset_filename = os.path.join(rootpath,collection,'Annotation','test_dataset.txt')
  test_ids = [x.strip() for x in open(testset_filename).readlines()]
  gt_filename = os.path.join(rootpath,collection,'seg.Flickr8k.token.Chinese.txt')
 
  testid2sentences = {}
  input_data = map(str.strip, open(gt_filename).readlines())
  input_data = [x.decode('utf-8', 'ignore') for x in input_data]
  input_data = [x for x in input_data if x.split()[0][:-2] in test_ids]
  print len(input_data)
  
  #ignore if a word not in chvob or not in wodtoix(words occur more tham threshold)
  testid2sentences = encode_to_chvob(vob2idx, input_data)
  count_del = 0
  for sid in testid2sentences.keys():
      testid2sentences[sid] = [misc['wordtoix'][x] for x in testid2sentences[sid] if x in misc['wordtoix'].keys()]
      if len(testid2sentences[sid]) < 2:
          del testid2sentences[sid]
          count_del+=1
  print '%d sentences cannot encoded with misx[wordtoix]'%count_del
 
  '''
  sentences = {}
  for img in dp.iterImages(split = 'test', max_images = max_images):
    filename = img['filename']
    for sent in img['sentences']:
      sentid = sent['sentid']
      sentences['%s#%s'%(filename,sentid)] = [misc['wordtoix'][x] if x in misc['wordtoix'].keys() else 0 for x in sent['tokens']]
      
    #references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    #sentences[filename] = [[int(x) if int(x) <= len(misc['wordtoix']) else 0 for x in sentence.split()] for sentence in references]
  '''

   
  # fetch the data provider
  dp = getDataProvider(dataset)

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  n = 0
  all_references = []
  all_candidates = []
  for img in dp.iterImages(split = 'test', max_images = max_images):
    n+=1
    filename = img['filename']
    print 'image %d/%d:%s' % (n, max_images,filename)
    
    #references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    #sentences = [[int(x) if x <= len(misc['wordtoix']) else 0 for x in sentence.split()] for sentence in references]
    #print sentences
    kwparams = { 'beam_size' : params['beam_size'] }
    top_sentences = BatchGenerator.sentence_relevance([{'image':img}], model, checkpoint_params, testid2sentences, **kwparams)
    fout.write('%s '%filename)
    output_line = '%s '%filename
    for x in top_sentences:
        for elem in x:
            fout.write('%s '%elem[0])
            output_line += '%s %s '%(elem[0],elem[1])
    output_line += '\n'
    print output_line
    fout.write('\n')
    fout_s.write(output_line)
    fout_s.flush()
  fout.close()
  fout_s.close()
示例#35
0
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    cp_params = checkpoint['params']

    if params['gen_model'] == None:
        model_npy = checkpoint[
            'model'] if 'model' in checkpoint else checkpoint['modelGen']
    else:
        gen_cp = pickle.load(open(params['gen_model'], 'rb'))
        model_npy = gen_cp.get('model', {})

    cp_params['use_theano'] = 1
    if params['dobeamsearch']:
        cp_params['advers_gen'] = 0

    if params['use_label_file'] == 1:
        params['poolmethod'] = cp_params['poolmethod'] if params[
            'poolmethod'] == None else params['poolmethod']
        params['labels'] = cp_params['labels'] if params[
            'labels'] == None else params['labels']
        params['featfromlbl'] = cp_params['featfromlbl'] if params[
            'featfromlbl'] == None else params['featfromlbl']
        params['uselabel'] = cp_params['uselabel'] if params[
            'uselabel'] == None else params['uselabel']
    else:
        params['uselabel'] = 0
    print 'parsed parameters:'
    print json.dumps(params, indent=2)

    if 'image_feat_size' not in cp_params:
        cp_params['image_feat_size'] = 4096

    if 'misc' in checkpoint:
        misc = checkpoint['misc']
        ixtoword = misc['ixtoword']
    else:
        misc = {}
        ixtoword = checkpoint['ixtoword']
        misc['wordtoix'] = checkpoint['wordtoix']

    cp_params['softmax_smooth_factor'] = params['softmax_smooth_factor']
    cp_params['softmax_propogate'] = params['softmax_propogate']
    cp_params['computelogprob'] = params['computelogprob']
    cp_params['greedy'] = params['greedy']
    cp_params['gen_input_noise'] = 0

    if cp_params.get('sched_sampling_mode', None) != None:
        cp_params['sched_sampling_mode'] = None

    # load the tasks.txt file and setupe feature loading
    root_path = params['root_path']

    img_names_list = open(params['imgList'], 'r').read().splitlines()
    auxidxes = []

    img_names = [x.rsplit(',')[0] for x in img_names_list]

    if len(img_names_list[0].split(',', 1)) > 1:
        if type(ast.literal_eval(img_names_list[0].split(
                ',', 1)[1].strip())) == tuple:
            idxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())[0]
                for x in img_names_list
            ]
            auxidxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())[1]
                for x in img_names_list
            ]
        else:
            idxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())
                for x in img_names_list
            ]
    else:
        idxes = xrange(len(img_names_list))

    if cp_params.get('swap_aux') == 0 or auxidxes == []:
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, idxes, auxidxes=auxidxes)
    else:
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, auxidxes, auxidxes=idxes)

    ##-------------------------------- Setup the models --------------------------###########
    if cp_params.get('use_encoder_for', 0) & 1:
        imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'],
                                              cp_params['word_encoding_size'],
                                              cp_params,
                                              mdl_prefix='img_enc_',
                                              features=features.T)

        zipp(model_npy, imgFeatEncoder.model_th)
        (imgenc_use_dropout, imgFeatEnc_inp, xI,
         updatesLSTMImgFeat) = imgFeatEncoder.build_model(
             imgFeatEncoder.model_th, cp_params)
    else:
        xI = None
        imgFeatEnc_inp = []

    if cp_params.get('use_encoder_for', 0) & 2:
        auxFeatEncoder = RecurrentFeatEncoder(cp_params['aux_inp_size'],
                                              cp_params['image_encoding_size'],
                                              cp_params,
                                              mdl_prefix='aux_enc_',
                                              features=aux_inp.T)
        zipp(model_npy, auxFeatEncoder.model_th)
        (auxenc_use_dropout, auxFeatEnc_inp, xAux,
         updatesLSTMAuxFeat) = auxFeatEncoder.build_model(
             auxFeatEncoder.model_th, cp_params)
    else:
        auxFeatEnc_inp = []
        xAux = None

    # Testing to see if diversity can be achieved by weighing words
    if params['word_freq_w'] != None:
        w_freq = json.load(open(params['word_freq_w'], 'r'))
        w_logw = np.zeros(len(misc['wordtoix']), dtype=np.float32)
        for w in w_freq:
            if w in misc['wordtoix']:
                w_logw[misc['wordtoix'][w]] = w_freq[w]
        w_logw = w_logw / w_logw[1:].min()
        w_logw[0] = w_logw.max()
        w_logw = -params['word_freq_sc'] * np.log(w_logw)
    else:
        w_logw = None

    BatchGenerator = decodeGenerator(cp_params)
    # Compile and init the theano predictor
    BatchGenerator.prepPredictor(model_npy,
                                 cp_params,
                                 params['beam_size'],
                                 xI,
                                 xAux,
                                 imgFeatEnc_inp + auxFeatEnc_inp,
                                 per_word_logweight=w_logw)
    model = BatchGenerator.model_th
    if params['greedy']:
        BatchGenerator.usegumbel.set_value(0)

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = copy(cp_params)
    if cp_params.get('class_out_factoring', 0) == 1:
        blob['checkpoint_params'].pop('ixtoclsinfo')
    blob['imgblobs'] = []

    N = len(img_names)

    # iterate over all images and predict sentences
    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    kwparams = {}

    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)

    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        D, NN = features.shape
        img = {}
        img['feat'] = features[:, feat_idx[n]].T
        img['img_idx'] = feat_idx[n]
        if cp_params.get('en_aux_inp', 0):
            img['aux_inp'] = aux_inp(
                aux_idx[n]) if aux_inp != [] else np.zeros(
                    cp_params['aux_inp_size'], dtype=np.float32)
            img['aux_idx'] = aux_idx[n] if aux_inp != [] else []
        img['local_file_path'] = img_names[n]
        # perform the work. heavy lifting happens inside
        enc_inp_list = prepare_seq_features(
            [{
                'image': img
            }],
            use_enc_for=cp_params.get('use_encoder_for', 0),
            use_shared_mem=cp_params.get('use_shared_mem_enc', 0))
        #import pdb;pdb.set_trace()
        Ys, Ax = BatchGenerator.predict([{
            'image': img
        }],
                                        cp_params,
                                        ext_inp=enc_inp_list)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[0] if params[
            'rescoreByLen'] == 0 else rescoreProbByLen(
                Ys[0]
            )  # take predictions for the first (and only) image we passed in
        top_predictions = sorted(top_predictions,
                                 key=lambda aa: aa[0],
                                 reverse=True)

        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        if cp_params.get('reverse_sentence', 0) == 0:
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
        else:
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in reversed(top_prediction[1])
                if ix > 0
            ])  # ix 0 is the END token, skip that
        #if candidate == '':
        #    import pdb;pdb.set_trace()
        if params['rescoreByLen'] == 0:
            print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
        else:
            print 'PRED: (%f, %f) %s' % (float(
                top_prediction[0]), float(top_prediction[2]), candidate)
        img_blob['candidate'] = {
            'text': candidate,
            'logprob': float(top_prediction[0])
        }

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[
                ci + 1]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            candlist.append({
                'text': candidate,
                'logprob': float(prediction[0])
            })

        img_blob['candidatelist'] = candlist
        blob['imgblobs'].append(img_blob)
        if (n % 5000) == 1:
            print 'writing predictions to %s...' % (save_file, )
            json.dump(blob, open(save_file, 'w'))

    # dump result struct to file
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))
示例#36
0
def main(params):
    dim = 300
    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    # load glove vect dict
    #glove_dict_path = '../vecDict.pickle'
    #with open(glove_dict_path, 'rb') as handle:
    #  vec_dict = pickle.load(handle)

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = params['root_path']
    img_names = open(os.path.join(root_path, 'tasks.txt'),
                     'r').read().splitlines()

    # load the features for all images
    features_path = os.path.join(root_path, 'vgg_feats.mat')
    features_struct = scipy.io.loadmat(features_path)
    features = features_struct[
        'feats']  # this is a 4096 x N numpy array of features
    D, N = features.shape

    fileNameToVector = {}
    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        img = {}
        img['feat'] = features[:, n]
        img['local_file_path'] = img_names[n]

        # perform the work. heavy lifting happens inside
        kwparams = {'beam_size': params['beam_size']}
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print 'PRED: (%f) %s' % (top_prediction[0], candidate)

        # Calculate the sentence vector using avg of GLoVe word vectors
        #st = EnglishStemmer()
        #currSentenceVector = np.zeros(dim)
        #numWords = 0
        #for word in candidate.split():
        #  if st.stem(word) in vec_dict:
        #    currSentenceVector += vec_dict[st.stem(word)].astype(np.float)
        #    numWords += 1
        #  elif st.stem(word)+'e' in vec_dict:
        #    currSentenceVector += vec_dict[st.stem(word)+'e'].astype(np.float)
        #    numWords += 1

        #currSentenceVector /= numWords
        #fileNameToVector[img['local_file_path']] = currSentenceVector

        img_blob['candidate'] = {
            'text': candidate,
            'logprob': top_prediction[0]
        }
        blob['imgblobs'].append(img_blob)

    # dump result struct to file
    save_file = os.path.join(root_path, 'result_struct.json')
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump the fileNameToVector mapping to a pickle file
    #with open('fileNameToVector.pickle', 'wb') as handle:
    #  pickle.dump(fileNameToVector, handle)

    # dump output html
    html = ''
    for img in blob['imgblobs']:
        html += '<img src="%s" height="400"><br>' % (img['img_path'], )
        html += '(%f) %s <br><br>' % (img['candidate']['logprob'],
                                      img['candidate']['text'])
    html_file = os.path.join(root_path, 'result.html')
    print 'writing html result file to %s...' % (html_file, )
    open(html_file, 'w').write(html)
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    max_images = params['max_images']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model = checkpoint['model']

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    all_bleu_scores = []
    n = 0
    #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images):
    for img in dp.iterImages(split='test', max_images=max_images):
        n += 1
        print 'image %d/%d:' % (n, max_images)
        references = [x['tokens']
                      for x in img['sentences']]  # as list of lists of tokens
        kwparams = {'beam_size': params['beam_size']}
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob['img_path'] = img['local_file_path']
        img_blob['imgid'] = img['imgid']

        # encode the human-provided references
        img_blob['references'] = []
        for gtwords in references:
            print 'GT: ' + ' '.join(gtwords)
            img_blob['references'].append({'text': ' '.join(gtwords)})

        # now evaluate and encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = [ixtoword[ix] for ix in top_prediction[1]]
        print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate))
        bleu_scores = evalCandidate(candidate, references)
        print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores)
        img_blob['candidate'] = {
            'text': ' '.join(candidate),
            'logprob': top_prediction[0],
            'bleu': bleu_scores
        }

        all_bleu_scores.append(bleu_scores)
        blob['imgblobs'].append(img_blob)

    print 'final average bleu scores:'
    bleu_averages = [
        sum(x[i] for x in all_bleu_scores) * 1.0 / len(all_bleu_scores)
        for i in xrange(3)
    ]
    blob['final_result'] = {'bleu': bleu_averages}
    print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages)

    # now also evaluate test split perplexity
    gtppl = eval_split('test',
                       dp,
                       model,
                       checkpoint_params,
                       misc,
                       eval_max_images=max_images)
    print 'perplexity of ground truth words: %f' % (gtppl, )
    blob['gtppl'] = gtppl

    # dump result struct to file
    print 'saving result struct to %s' % (params['result_struct_filename'], )
    json.dump(blob, open(params['result_struct_filename'], 'w'))
示例#38
0
def main(scriptparams):
    checkpoint = pickle.load(open(scriptparams['checkpoint'], 'rb'))
    npfilename = osp.join(
        'scorelogs',
        osp.basename(scriptparams['checkpoint']).split('.')[0] + '_logprob%s' %
        (scriptparams['split']))
    misc = checkpoint['misc']

    # fetch the data provider
    params = checkpoint['params']
    params['use_gumbel_mse'] = 0
    params['maxlen'] = scriptparams['maxlen']

    dp = getDataProvider(params)
    model_init_gen_from = checkpoint.get(
        'model', {}) if 'model' in checkpoint else checkpoint['modelGen']

    lstmGenerator = decodeGenerator(params)
    model, misc['update'], misc['regularize'] = (lstmGenerator.model_th,
                                                 lstmGenerator.update_list,
                                                 lstmGenerator.regularize)

    if params.get('use_encoder_for', 0) & 1:
        if params.get('encode_gt_sentences', 0):
            xI = tensor.zeros((batch_size, params['image_encoding_size']))
            imgFeatEnc_inp = []
        else:
            imgFeatEncoder = RecurrentFeatEncoder(params['image_feat_size'],
                                                  params['word_encoding_size'],
                                                  params,
                                                  mdl_prefix='img_enc_',
                                                  features=dp.features.T)
            mdlLen = len(model.keys())
            model.update(imgFeatEncoder.model_th)
            assert (len(model.keys()) == (mdlLen +
                                          len(imgFeatEncoder.model_th.keys())))
            misc['update'].extend(imgFeatEncoder.update_list)
            misc['regularize'].extend(imgFeatEncoder.regularize)
            (imgenc_use_dropout, imgFeatEnc_inp, xI,
             updatesLSTMImgFeat) = imgFeatEncoder.build_model(model, params)
    else:
        xI = None
        imgFeatEnc_inp = []

    if params.get('use_encoder_for', 0) & 2:
        aux_enc_inp = model['Wemb'] if params.get('encode_gt_sentences',
                                                  0) else dp.aux_inputs.T
        hid_size = params['featenc_hidden_size']
        auxFeatEncoder = RecurrentFeatEncoder(hid_size,
                                              params['image_encoding_size'],
                                              params,
                                              mdl_prefix='aux_enc_',
                                              features=aux_enc_inp)
        mdlLen = len(model.keys())
        model.update(auxFeatEncoder.model_th)
        assert (len(model.keys()) == (mdlLen +
                                      len(auxFeatEncoder.model_th.keys())))
        misc['update'].extend(auxFeatEncoder.update_list)
        misc['regularize'].extend(auxFeatEncoder.regularize)
        (auxenc_use_dropout, auxFeatEnc_inp, xAux,
         updatesLSTMAuxFeat) = auxFeatEncoder.build_model(model, params)

        if params.get('encode_gt_sentences', 0):
            # Reshape it size(batch_size, n_gt, hidden_size)
            xAux = xAux.reshape(
                (-1, params['n_encgt_sent'], params['featenc_hidden_size']))
            # Convert it to size (batch_size, n_gt*hidden_size
            xAux = xAux.flatten(2)
    else:
        auxFeatEnc_inp = []
        xAux = None

    attn_nw_func = None

    (use_dropout, inp_list_gen, f_pred_prob, cost, predTh,
     updatesLSTM) = lstmGenerator.build_model(model,
                                              params,
                                              xI,
                                              xAux,
                                              attn_nw=attn_nw_func)
    inp_list = imgFeatEnc_inp + auxFeatEnc_inp + inp_list_gen

    f_eval = theano.function(inp_list, cost, name='f_eval')
    #--------------------------------- Cost function and gradient computations setup #---------------------------------#

    zipp(model_init_gen_from, model)
    # perform the evaluation on VAL set
    #val_sc = eval_split_theano(scriptparams['split'], dp, model, params, misc, f_eval)
    logppl = []
    logppln = []
    imgids = []
    nsent = 0

    for batch in dp.iterImageSentencePairBatch(split=scriptparams['split'],
                                               max_batch_size=1,
                                               max_images=-1):
        enc_inp_list = prepare_seq_features(
            batch,
            use_enc_for=params.get('use_encoder_for', 0),
            maxlen=params['maxlen'],
            use_shared_mem=params.get('use_shared_mem_enc', 0),
            enc_gt_sent=params.get('encode_gt_sentences', 0),
            n_enc_sent=params.get('n_encgt_sent', 0),
            wordtoix=misc['wordtoix'])
        gen_inp_list, lenS = prepare_data(
            batch,
            misc['wordtoix'],
            rev_sents=params.get('reverse_sentence', 0),
            use_enc_for=params.get('use_encoder_for', 0),
            use_unk_token=params.get('use_unk_token', 0))

        inp_list = enc_inp_list + gen_inp_list
        cost = f_eval(*inp_list)
        logppl.append(cost[1])
        logppln.append(lenS)
        imgids.append(
            str(batch[0]['image']['cocoid']) + '_' + str(batch[0]['sentidx']))
        nsent += 1

    perplex = 2**(np.array(logppl) / np.array(logppln))
    np.savez(npfilename, pplx=perplex, keys=np.array(imgids))

    #ppl2 = 2 ** (logppl / logppln)
    #print 'evaluated %d sentences and got perplexity = %f' % (nsent, ppl2)
    #met = [ppl2]

    print 2**(np.array(logppl).sum() / np.array(logppln).sum())
示例#39
0
def main(params):
  batch_size = params['batch_size']
  dataset = params['dataset']
  word_count_threshold = params['word_count_threshold']
  do_grad_check = params['do_grad_check']
  max_epochs = params['max_epochs']
  host = socket.gethostname() # get computer hostname

  params['mode'] = 'CPU'

  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {} # stores various misc items that need to be passed around the framework

  # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
  # at least word_count_threshold number of times
  misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold)
  # delegate the initialization of the model to the Generator class
  BatchGenerator = decodeGenerator(params)
  init_struct = BatchGenerator.init(params, misc)
  model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize'])
  
  if params['mode'] == 'GPU':
    # force overwrite here. This is a bit of a hack, not happy about it
    model['bd'] = gp.garray(bias_init_vector.reshape(1, bias_init_vector.size))
  else:
    model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

  print 'model init done.'
  print 'model has keys: ' + ', '.join(model.keys())
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update'])
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize'])
  print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

  # initialize the Solver and the cost function
  solver = Solver()
  def costfun(batch, model):
    # wrap the cost function to abstract some things away from the Solver
    return RNNGenCost(batch, model, params, misc)

  # calculate how many iterations we need
  num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences')
  num_iters_one_epoch = num_sentences_total / batch_size
  max_iters = max_epochs * num_iters_one_epoch
  eval_period_in_epochs = params['eval_period']
  eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs))
  abort = False
  top_val_ppl2 = -1
  smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion
  val_ppl2 = len(misc['ixtoword'])
  last_status_write_time = 0 # for writing worker job status reports
  json_worker_status = {}
  json_worker_status['params'] = params
  json_worker_status['history'] = []
  max_iters = 1
  for it in xrange(max_iters):
    if abort: break
    t0 = time.time()
    # fetch a batch of data
    batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
    # evaluate cost, gradient and perform parameter update
    step_struct = solver.step(batch, model, costfun, **params)
    cost = step_struct['cost']
    dt = time.time() - t0

    # print training statistics
    train_ppl2 = step_struct['stats']['ppl2']
    smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average
    if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out
    epoch = it * 1.0 / num_iters_one_epoch
    print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
          % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
             train_ppl2, smooth_train_ppl2)

    # perform gradient check if desired, with a bit of a burnin time (10 iterations)
    #if it == 10 and do_grad_check:
    #  solver.gradCheck(batch, model, costfun)
    #  print 'done gradcheck. continue?'
    #  raw_input()
    #
    ## detect if loss is exploding and kill the job if so
    #total_cost = cost['total_cost']
    #if it == 0:
    #  total_cost0 = total_cost # store this initial cost
    #if total_cost > total_cost0 * 2:
    #  print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
    #  abort = True # set the abort flag, we'll break out
    #
    ## logging: write JSON files for visual inspection of the training
    #tnow = time.time()
    #if tnow > last_status_write_time + 60*1: # every now and then lets write a report
    #  last_status_write_time = tnow
    #  jstatus = {}
    #  jstatus['time'] = datetime.datetime.now().isoformat()
    #  jstatus['iter'] = (it, max_iters)
    #  jstatus['epoch'] = (epoch, max_epochs)
    #  jstatus['time_per_batch'] = dt
    #  jstatus['smooth_train_ppl2'] = smooth_train_ppl2
    #  jstatus['val_ppl2'] = val_ppl2 # just write the last available one
    #  jstatus['train_ppl2'] = train_ppl2
    #  json_worker_status['history'].append(jstatus)
    #  status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json')
    #  try:
    #    json.dump(json_worker_status, open(status_file, 'w'))
    #  except Exception, e: # todo be more clever here
    #    print 'tried to write worker status into %s but got error:' % (status_file, )
    #    print e
    #
    ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good
    #is_last_iter = (it+1) == max_iters
    #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
    #  val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set
    #  print 'validation perplexity = %f' % (val_ppl2, )
    #  write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
    #  if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
    #    if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
    #      # if we beat a previous record or if this is the first time
    #      # AND we also beat the user-defined threshold or it doesnt exist
    #      top_val_ppl2 = val_ppl2
    #      filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2)
    #      filepath = os.path.join(params['checkpoint_output_directory'], filename)
    #      checkpoint = {}
    #      checkpoint['it'] = it
    #      checkpoint['epoch'] = epoch
    #      checkpoint['model'] = model
    #      checkpoint['params'] = params
    #      checkpoint['perplexity'] = val_ppl2
    #      checkpoint['wordtoix'] = misc['wordtoix']
    #      checkpoint['ixtoword'] = misc['ixtoword']
    #      try:
    #        pickle.dump(checkpoint, open(filepath, "wb"))
    #        print 'saved checkpoint in %s' % (filepath, )
    #      except Exception, e: # todo be more clever here
    #        print 'tried to write checkpoint into %s but got error: ' % (filepat, )
    #        print e
    cuda.close()
示例#40
0
def gen_from_scratch(params):
    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    max_images = params['max_images']
    fout = params['output_file']
    tempo = params['tempo']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    params['dataset'] = dataset
    model = checkpoint['model']
    dump_folder = params['dump_folder']
    ixtoword = checkpoint['ixtoword']

    if dump_folder:
        print 'creating dump folder ' + dump_folder
        os.system('mkdir -p ' + dump_folder)

    # Generate the chord sequence
    parts, chords, num_chords, num_parts = chord_sequence_generation.main(
        params)
    imgs = two_hot_encoding(parts, chords, num_chords, num_parts)

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    candidates = []
    for img in imgs:
        n += 1
        print 'image %d/%d:' % (n, max_images)
        kwparams = {'beam_size': params['beam_size']}
        img_dict = {'feat': img}
        Ys = BatchGenerator.predict([{
            'image': img_dict
        }], model, checkpoint_params, **kwparams)

        # now evaluate and encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        candidates.append(candidate)
        print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    # Write midi
    for idx, c in enumerate(candidates):
        cs = c.split()
        for e in cs:
            es = e.split(';')
            pitch = int(es[0])
            pos = es[1]
            pos = convert_pos(pos, idx)
            dur = es[2]
            dur = convert_dur(dur)
            note = pretty_midi.Note(90, pitch, pos, pos + dur)
            new_track.notes.append(note)

    new_midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo)
    new_midi_data.instruments.append(new_track)

    # pre-set chord preogression
    chord_names = [
        'C;Em', 'A#;F', 'Dm;Em', 'Dm;G', 'Dm;C', 'Am;Em', 'F;C', 'F;G', 'Dm;F',
        'C;C', 'C;E', 'Am;G', 'F;Em', 'F;F', 'G;G', 'Am;Am', 'Dm;Dm', 'C;A#',
        'Em;F', 'C;G', 'G#;A#', 'F;Am', 'G#;Fm', 'Am;Gm', 'F;E', 'Dm;Am',
        'Em;Em', 'G#;G#', 'Em;Am', 'C;Am', 'F;Dm', 'G#;G', 'F;A#', 'Am;G#',
        'C;D', 'G;Am', 'Am;C', 'Am;A#', 'A#;G', 'Am;F', 'A#;Am', 'E;Am',
        'Dm;E', 'A;G', 'Am;Dm', 'Em;Dm', 'C;F#m', 'Am;D', 'G#;Em', 'C;Dm',
        'C;F', 'G;C', 'A#;A#', 'Am;Caug', 'Fm;G', 'A;A'
    ]
    chord_to_pitch = {
        'C': 36,
        'C#': 37,
        'D': 38,
        'D#': 39,
        'E': 40,
        'F': 41,
        'F#': 42,
        'G': 43,
        'G#': 44,
        'A': 45,
        'A#': 46,
        'B': 47
    }
    for time, chord in enumerate(chords):
        n1, n2 = re.split(";", chord_names[chord[0]])
        n1, n2 = re.sub("m", "", n1), re.sub("m", "", n2)
        bass_track.notes.append(
            pretty_midi.Note(90, chord_to_pitch[n1], 2 * time, 2 * time + 1))
        bass_track.notes.append(
            pretty_midi.Note(90, chord_to_pitch[n2], 2 * time + 1,
                             2 * (time + 1)))
    new_midi_data.instruments.append(bass_track)
    adjust_tempo(new_midi_data)
    if params['quantize']:
        quantize(new_midi_data)
    new_midi_data.write(fout)
def main(params):

  # load the checkpoint
  if params['multi_model'] == 0:
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    model_npy = checkpoint['model']
    checkpoint_params['use_theano'] = 1
    if 'image_feat_size' not in  checkpoint_params:
        checkpoint_params['image_feat_size'] = 4096 
    
    if 'misc' in checkpoint:
      misc = checkpoint['misc']
      ixtoword = misc['ixtoword']
    else:
      misc = {}
      ixtoword = checkpoint['ixtoword']
      misc['wordtoix'] = checkpoint['wordtoix']
    
    checkpoint_params['softmax_smooth_factor'] = params['softmax_smooth_factor']
    checkpoint_params['softmax_propogate'] = params['softmax_propogate']
    if checkpoint_params.get('class_out_factoring',0) == 1:
      checkpoint_params['ixtoclsinfo'] = np.zeros((checkpoint_params['nClasses'],2),dtype=np.int32)
      ixtoclsinfo = misc['ixtoclsinfo']
      checkpoint_params['ixtoclsinfo'][ixtoclsinfo[:,0]] = ixtoclsinfo[:,1:3]

    if checkpoint_params.get('sched_sampling_mode',None) !=None:
        checkpoint_params['sched_sampling_mode'] = None
    
    BatchGenerator = decodeGenerator(checkpoint_params)
    # Compile and init the theano predictor 
    BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size'])
    model = BatchGenerator.model_th
  else:
    BatchGenerator = []
    model_npy = []
    modelTh = []
    checkpoint_params = []
    for i,checkpoint_path in enumerate(params['checkpoint_path']):
        checkpoint = pickle.load(open(checkpoint_path, 'rb'))
        model_npy.append(checkpoint['model'])
        checkpoint_params.append(checkpoint['params'])
        checkpoint_params[i]['use_theano'] = 1
        BatchGenerator.append(decodeGenerator(checkpoint_params[i]))
        zipp(model_npy[i],BatchGenerator[i].model_th)
        modelTh.append(BatchGenerator[i].model_th)
        modelTh[i]['comb_weight'] = 1.0/params['nmodels']
    
    BatchGenerator[0].prepMultiPredictor(modelTh,checkpoint_params,params['beam_size'],params['nmodels'])
  
  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = copy(checkpoint_params)
  if checkpoint_params.get('class_out_factoring',0) == 1:
    blob['checkpoint_params'].pop('ixtoclsinfo')
  blob['imgblobs'] = []

  # load the tasks.txt file and setupe feature loading
  root_path = params['root_path']
  img_names_list = open(params['imgList'], 'r').read().splitlines()
  auxidxes = []

  if len(img_names_list[0].rsplit(',')) > 2:
    img_names = [x.rsplit(',')[0] for x in img_names_list]
    idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
    auxidxes = [int(x.rsplit(',')[2]) for x in img_names_list]
  elif len(img_names_list[0].rsplit(',')) > 1:
    img_names = [x.rsplit(',')[0] for x in img_names_list]
    idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
  else:
    img_names = img_names_list
    idxes = xrange(len(img_names_list))
  
  #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'):
  #  raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
  #  return
  # load the features for all images
  if checkpoint_params.get('swap_aux') == 0 or auxidxes == []:
    features, aux_inp = loadArbitraryFeatures(params, idxes, auxidxes=auxidxes)
  else:
    features, aux_inp = loadArbitraryFeatures(params, auxidxes, auxidxes=idxes)

  N = len(img_names) 

  # iterate over all images and predict sentences
  print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
    checkpoint['perplexity']))
  
  kwparams = { 'beam_size' : params['beam_size'] }
  
  jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) 
  save_file = os.path.join(root_path, jsonFname)
  
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    if params['multi_model'] == 0:
        D,NN = features.shape
        img = {}
        img['feat'] = features[:, n]
        if checkpoint_params.get('en_aux_inp',0):
            img['aux_inp'] = aux_inp[:, n]
        img['local_file_path'] =img_names[n]
        # perform the work. heavy lifting happens inside
        Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    else:
        kwparams['nmodels'] = params['nmodels']
        batch = []
        for i in xrange(params['nmodels']):
            img = {}
            img['feat'] = features[i][:, n]
            if checkpoint_params[i].get('en_aux_inp',0):
                img['aux_inp'] = aux_inp[i][:, n]
            img['local_file_path'] =img_names[n]
            batch.append({'image':img})
        Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams)

    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    if checkpoint_params.get('reverse_sentence',0) == 0:
        candidate = ' '.join([ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    else:
        candidate = ' '.join([ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
    img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])}    

    # Code to save all the other candidates 
    candlist = []
    for ci in xrange(len(top_predictions)-1):
        prediction = top_predictions[ci+1] # these are sorted with highest on top
        candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        candlist.append({'text': candidate, 'logprob': float(prediction[0])})
    
    img_blob['candidatelist'] = candlist
    blob['imgblobs'].append(img_blob)
    if (n%5000) == 1:
        print 'writing predictions to %s...' % (save_file, )
        json.dump(blob, open(save_file, 'w'))

  # dump result struct to file
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  html = ''
  for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])

  html_file = 'result_%s.html' % (params['fname_append']) 
  html_file = os.path.join(root_path, html_file)
  print 'writing html result file to %s...' % (html_file, )
  open(html_file, 'w').write(html)
def main(params):
    batch_size = params['batch_size']
    word_count_threshold = params['word_count_threshold']
    max_epochs = params['max_epochs']

    # fetch the data provider
    dp = getDataProvider(params)

    # Initialize the optimizer
    solver = Solver(params['solver'])

    params['aux_inp_size'] = dp.aux_inp_size
    params['image_feat_size'] = dp.img_feat_size

    print 'Image feature size is %d, and aux input size is %d' % (
        params['image_feat_size'], params['aux_inp_size'])

    misc = {
    }  # stores various misc items that need to be passed around the framework

    if params['checkpoint_file_name'] == 'None':
        # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
        # at least word_count_threshold number of times
        misc['wordtoix'], misc[
            'ixtoword'], bias_init_vector = preProBuildWordVocab(
                dp.iterSentences('train'), word_count_threshold)
    else:
        # Load Vocabulary from the checkpoint
        misc = checkpoint_init['misc']

    params['vocabulary_size'] = len(misc['wordtoix'])
    params['output_size'] = len(misc['ixtoword'])  # these should match though

    # This initializes the generator model parameters and does matrix initializations
    if params['t_eval_only'] == 0:
        generator = decodeGenerator(params)
        # Build the computational graph

        if params['use_encoder_for'] & 2:
            aux_enc_inp = generator.model_th['Wemb'] if params[
                'encode_gt_sentences'] else dp.aux_inputs.T
            hid_size = params['featenc_hidden_size']
            auxFeatEncoder = RecurrentFeatEncoder(
                hid_size,
                params['image_encoding_size'],
                params,
                mdl_prefix='aux_enc_',
                features=aux_enc_inp)
            mdlLen = len(generator.model_th.keys())
            generator.model_th.update(auxFeatEncoder.model_th)
            assert (len(generator.model_th.keys()) == (
                mdlLen + len(auxFeatEncoder.model_th.keys())))
            (auxenc_use_dropout, auxFeatEnc_inp, xAux,
             updatesLSTMAuxFeat) = auxFeatEncoder.build_model(
                 generator.model_th, params)

            if params['encode_gt_sentences']:
                # Reshape it size(batch_size, n_gt, hidden_size)
                xAux = xAux.reshape((-1, params['n_encgt_sent'],
                                     params['featenc_hidden_size']))
                # Convert it to size (batch_size, n_gt*hidden_size
                xAux = xAux.flatten(2)
                xI = tensor.zeros((batch_size, params['image_encoding_size']))
                imgFeatEnc_inp = []
        else:
            auxFeatEnc_inp = []
            imgFeatEnc_inp = []
            xAux = None
            xI = None

        (gen_inp_list, predLogProb, predIdx, predCand, gen_out, updatesLstm,
         seq_lengths) = generator.build_prediction_model(generator.model_th,
                                                         params,
                                                         xI=xI,
                                                         xAux=xAux)
        gen_inp_list = imgFeatEnc_inp + auxFeatEnc_inp + gen_inp_list
        gen_out = gen_out.reshape([
            gen_out.shape[0], -1, params['n_gen_samples'],
            params['vocabulary_size']
        ])
        #convert updates lstm to a tuple, this is to help merge it with grad updates
        updatesLstm = [(k, v) for k, v in updatesLstm.iteritems()]
        f_gen_only = theano.function(
            gen_inp_list, [predLogProb, predIdx, gen_out, seq_lengths],
            name='f_pred',
            updates=updatesLstm)

        modelGen = generator.model_th
        upListGen = generator.update_list

        if params['use_mle_train']:
            (use_dropout_genTF, inp_list_genTF, _, cost_genTF, _,
             updatesLSTM_genTF) = generator.build_model(
                 generator.model_th, params)
            f_eval_genTF = theano.function(inp_list_genTF,
                                           cost_genTF,
                                           name='f_eval')
            grads_genTF = tensor.grad(cost_genTF[0],
                                      wrt=modelGen.values(),
                                      add_names=True)
            lr_genTF = tensor.scalar(name='lr', dtype=config.floatX)
            f_grad_genTF, f_update_genTF, zg_genTF, rg_genTF, ud_genTF = solver.build_solver_model(
                lr_genTF, modelGen, grads_genTF, inp_list_genTF, cost_genTF,
                params)
    else:
        modelGen = []
        updatesLstm = []

    if params['met_to_track'] != []:
        trackMetargs = {'eval_metric': params['met_to_track']}
        refToks, scr_info = eval_prep_refs('val', dp, params['met_to_track'])
        trackMetargs['refToks'] = refToks
        trackMetargs['scr_info'] = scr_info

    # Initialize the evalator model
    if params['share_Wemb']:
        evaluator = decodeEvaluator(params, modelGen['Wemb'])
    else:
        evaluator = decodeEvaluator(params)
    modelEval = evaluator.model_th

    if params['t_eval_only'] == 0:
        # Build the evaluator graph to evaluate reference and generated captions
        if params.get('upd_eval_ref', 0):
            (refeval_inp_list, ref_f_pred_fns, ref_costs, ref_predTh,
             ref_modelEval) = evaluator.build_advers_eval(modelEval, params)
        (eval_inp_list, f_pred_fns, costs, predTh,
         modelEval) = evaluator.build_advers_eval(modelEval, params,
                                                  gen_inp_list, gen_out,
                                                  updatesLstm, seq_lengths)
    else:
        # Build the evaluator graph to evaluate only reference captions
        (eval_inp_list, f_pred_fns, costs, predTh,
         modelEval) = evaluator.build_advers_eval(modelEval, params)

    # force overwrite here. The bias to the softmax is initialized to reflect word frequencies
    if params['t_eval_only'] == 0:  # and 0:
        if params['checkpoint_file_name'] == 'None':
            modelGen['bd'].set_value(bias_init_vector.astype(config.floatX))
            if params.get('class_out_factoring', 0) == 1:
                modelGen['bdCls'].set_value(
                    bias_init_inter_class.astype(config.floatX))

    comb_inp_list = eval_inp_list
    if params['t_eval_only'] == 0:
        for inp in gen_inp_list:
            if inp not in comb_inp_list:
                comb_inp_list.append(inp)

    # Compile an evaluation function.. Doesn't include gradients
    # To be used for validation set evaluation or debug purposes
    if params['t_eval_only'] == 0:
        f_eval = theano.function(comb_inp_list,
                                 costs[:1],
                                 name='f_eval',
                                 updates=updatesLstm)
    else:
        f_eval = theano.function(comb_inp_list, costs[:1], name='f_eval')

    if params['share_Wemb']:
        modelEval.pop('Wemb')
    if params['fix_Wemb']:
        upListGen.remove('Wemb')

    #-------------------------------------------------------------------------------------------------------------------------
    # Now let's build a gradient computation graph and update mechanism
    #-------------------------------------------------------------------------------------------------------------------------
    # First compute gradient on the evaluator params w.r.t cost
    if params.get('upd_eval_ref', 0):
        gradsEval_ref = tensor.grad(ref_costs[0],
                                    wrt=modelEval.values(),
                                    add_names=True)
    gradsEval = tensor.grad(costs[0], wrt=modelEval.values(), add_names=True)

    # Update functions for the evaluator
    lrEval = tensor.scalar(name='lrEval', dtype=config.floatX)
    if params.get('upd_eval_ref', 0):
        f_grad_comp_eval_ref, f_param_update_eval_ref, _, _, _ = solver.build_solver_model(
            lrEval,
            modelEval,
            gradsEval_ref,
            refeval_inp_list,
            ref_costs[0],
            params,
            w_clip=params['eval_w_clip'])
    f_grad_comp_eval, f_param_update_eval, zg_eval, rg_eval, ud_eval = solver.build_solver_model(
        lrEval,
        modelEval,
        gradsEval,
        comb_inp_list,
        costs[:1],
        params,
        updatesLstm,
        w_clip=params['eval_w_clip'])

    # Now compute gradient on the generator params w.r.t the cost
    if params['t_eval_only'] == 0:
        gradsGen = tensor.grad(costs[1], wrt=modelGen.values(), add_names=True)
        lrGen = tensor.scalar(name='lrGen', dtype=config.floatX)
        # Update functions for the generator
        f_grad_comp_gen, f_param_update_gen, zg_gen, rg_gen, ud_gen = solver.build_solver_model(
            lrGen, modelGen, gradsGen,
            comb_inp_list[:(len(comb_inp_list) - 1 +
                            params['gen_feature_matching'])], costs[1], params,
            updatesLstm)

    #-------------------------------------------------------------------------------------------------------------------------
    # If we want to track some metrics during the training, initialize stuff for that now
    #-------------------------------------------------------------------------------------------------------------------------
    print 'model init done.'
    if params['t_eval_only'] == 0:
        print 'Gen model has keys: ' + ', '.join(modelGen.keys())
    print 'Eval model has keys: ' + ', '.join(modelEval.keys())

    # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images
    # Hence in case of coco/flickr this will 5* no of images
    num_sentences_total = dp.getSplitSize('train', ofwhat='images')
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    skip_first = 20
    iters_eval = 5
    iters_gen = 1

    cost_eval_iter = []
    cost_gen_iter = []
    trackSc_array = []

    eval_period_in_epochs = params['eval_period']
    eval_period_in_iters = max(
        1, int(num_iters_one_epoch * eval_period_in_epochs))
    top_val_ppl2 = -1
    smooth_train_ppl2 = 0.5  # initially size of dictionary of confusion
    smooth_train_cost = 0.0  # initially size of dictionary of confusion
    smooth_train_cost_gen = 1.0  # initially size of dictionary of confusion
    val_ppl2 = len(misc['ixtoword'])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    json_worker_status['params'] = params
    json_worker_status['history'] = []
    write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
    iter_out_file = os.path.join(
        'logs', 'advmodel_checkpoint_%s_%s_%s_log.npz' %
        (params['dataset'], host, params['fappend']))

    len_hist = defaultdict(int)
    t_print_sec = 30
    ## Initialize the model parameters from the checkpoint file if we are resuming training
    if params['checkpoint_file_name'] != 'None':
        if params['t_eval_only'] != 1:
            print '\n Now initing gen Model:'
            zipp(model_init_gen_from, modelGen)
        if 'trackers' in checkpoint_init:
            trackSc_array = checkpoint_init['trackers'].get('trackScores', [])
        print '\n Now initing Eval Model:'
        zipp(model_init_eval_from, modelEval)
        #zipp(rg_init,rgGen)
        print("\nContinuing training from previous model\n. Already run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_init['epoch'], \
          checkpoint_init['perplexity']))

    ##############################################################
    # Define signal handler to catch ctl-c or kills so that we can save the model trained till that point
    def signal_handler(signal, frame):
        print('You pressed Ctrl+C! Saving Checkpoint Now before exiting!')
        filename = 'advmodel_checkpoint_%s_%s_%s_%.2f_INT.p' % (
            params['dataset'], host, params['fappend'], val_ppl2)
        dumpCheckpoint(filename, params, modelGen, modelEval, misc, it,
                       val_ppl2)
        sys.exit(0)

    #signal.signal(signal.SIGINT, signal_handler)
    ##############################################################

    #In testing disable sampling and use the greedy approach!?
    generator.usegumbel.set_value(1)
    if params['met_to_track'] != []:
        tsc_max, tsc_mean, tsc_min = eval_gen_samps(f_gen_only, dp, params,
                                                    misc, params['rev_eval'],
                                                    **trackMetargs)
        trackSc_array.append((0, {
            evm + '_max': tsc_max[i]
            for i, evm in enumerate(params['met_to_track'])
        }))
        trackSc_array[-1][1].update({
            evm + '_mean': tsc_mean[i]
            for i, evm in enumerate(params['met_to_track'])
        })
        trackSc_array[-1][1].update({
            evm + '_min': tsc_min[i]
            for i, evm in enumerate(params['met_to_track'])
        })

    disp_some_gen_samps(f_gen_only, dp, params, misc, n_samp=5)
    evaluator.use_noise.set_value(1.)
    eval_acc, gen_acc = eval_discrm_gen('val', dp, params, f_pred_fns[0], misc)
    # Re-enable sampling
    generator.usegumbel.set_value(1)

    np.savez(iter_out_file,
             eval_cost=np.array(cost_eval_iter),
             gen_cost=np.array(cost_gen_iter),
             tracksc=np.array(trackSc_array))
    smooth_train_cost = 0.0

    print '###################### NOW BEGINNING TRAINING #################################'

    for it in xrange(max_iters):
        t0 = time.time()
        # Enable using dropout in training
        evaluator.use_noise.set_value(1.)
        dt = 0.
        it2 = 0
        while eval_acc <= 60. or gen_acc >= 45. or it2 < iters_eval * skip_first:
            # fetch a batch of data
            t1 = time.time()

            s_probs = [
                0.6, 0.4, 0.0
            ] if params['eval_loss'] == 'contrastive' else [1.0, 0.0, 0.0]
            batch = dp.sampAdversBatch(batch_size,
                                       n_sent=params['n_gen_samples'],
                                       probs=s_probs)
            cnn_inps = prepare_adv_data(batch,
                                        misc['wordtoix'],
                                        maxlen=params['maxlen'],
                                        prep_for=params['eval_model'])

            enc_inp_list = prepare_seq_features(
                batch,
                use_enc_for=params['use_encoder_for'],
                maxlen=params['maxlen'],
                use_shared_mem=params['use_shared_mem_enc'],
                enc_gt_sent=params['encode_gt_sentences'],
                n_enc_sent=params['n_encgt_sent'],
                wordtoix=misc['wordtoix'])
            eval_cost = f_grad_comp_eval(*(cnn_inps + enc_inp_list))

            if np.isnan(eval_cost[0]):
                import pdb
                pdb.set_trace()
            f_param_update_eval(params['learning_rate_eval'])

            # Track training statistics
            smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * eval_cost[
                0] if it > 0 else eval_cost[0]
            dt2 = time.time() - t1
            if it2 % 500 == 499:
                gb = 0.  #modelGen['gumb_temp'].get_value() if params['use_gumbel_mse'] == 1 else 0
                print 'Iter %d/%d Eval Only Iter %d/%d, done. in %.3fs. Eval Cost is %.6f' % (
                    it, max_iters, it2, iters_eval * skip_first, dt2,
                    smooth_train_cost)
            if it2 % 100 == 99:
                eval_acc, gen_acc = eval_discrm_gen('val',
                                                    dp,
                                                    params,
                                                    f_pred_fns[0],
                                                    misc,
                                                    n_eval=500)
            it2 += 1

        evaluator.use_noise.set_value(1.)

        if it >= 0:
            skip_first = 1
        if it >= 100:
            skip_first = 1
        if it % 1000 == 999:
            skip_first = 1

        s_probs = [
            1.0, 0.0, 0.0
        ] if params['eval_loss'] == 'contrastive' else [1.0, 0.0, 0.0]
        batch = dp.sampAdversBatch(batch_size,
                                   n_sent=params['n_gen_samples'],
                                   probs=s_probs)
        cnn_inps = prepare_adv_data(batch,
                                    misc['wordtoix'],
                                    maxlen=params['maxlen'],
                                    prep_for=params['eval_model'])
        enc_inp_list = prepare_seq_features(
            batch,
            use_enc_for=params['use_encoder_for'],
            maxlen=params['maxlen'],
            use_shared_mem=params['use_shared_mem_enc'],
            enc_gt_sent=params['encode_gt_sentences'],
            n_enc_sent=params['n_encgt_sent'],
            wordtoix=misc['wordtoix'])

        gen_cost = f_grad_comp_gen(
            *(cnn_inps[:(len(cnn_inps) - 1 + params['gen_feature_matching'])] +
              enc_inp_list))
        f_param_update_gen(params['learning_rate_gen'])

        if params['use_mle_train']:
            generator.usegumbel.set_value(0)
            batch, l = dp.getRandBatchByLen(batch_size)
            gen_inp_list, lenS = prepare_data(batch, misc['wordtoix'],
                                              params['maxlen'])
            cost_genMLE = f_grad_genTF(*gen_inp_list)
            f_update_genTF(np.float32(params['learning_rate_gen'] / 50.0))
            generator.usegumbel.set_value(1)

        dt = time.time() - t0
        # print training statistics
        smooth_train_cost_gen = gen_cost if it == 0 else 0.99 * smooth_train_cost_gen + 0.01 * gen_cost

        tnow = time.time()
        if tnow > last_status_write_time + t_print_sec * 1:  # every now and then lets write a report
            gb = 0.  #modelGen['gumb_temp'].get_value() if params['use_gumbel_mse'] == 1 else 0
            print 'Iter %d/%d done. in %.3fs. Eval Cost is %.6f, Gen Cost is %.6f, temp: %.4f' % (it, max_iters, dt, \
             smooth_train_cost, smooth_train_cost_gen, gb)
            last_status_write_time = tnow

        cost_eval_iter.append(smooth_train_cost)
        cost_gen_iter.append(smooth_train_cost_gen)

        if it % 500 == 499:
            # Run the generator on the validation set and compute some metrics
            generator.usegumbel.set_value(1)
            if params['met_to_track'] != []:
                #In testing set the temperature to very low, so that it is equivalent to Greed samples
                tsc_max, tsc_mean, tsc_min = eval_gen_samps(
                    f_gen_only, dp, params, misc, params['rev_eval'],
                    **trackMetargs)
                trackSc_array.append((it, {
                    evm + '_max': tsc_max[i]
                    for i, evm in enumerate(params['met_to_track'])
                }))
                trackSc_array[-1][1].update({
                    evm + '_mean': tsc_mean[i]
                    for i, evm in enumerate(params['met_to_track'])
                })
                trackSc_array[-1][1].update({
                    evm + '_min': tsc_min[i]
                    for i, evm in enumerate(params['met_to_track'])
                })

            disp_some_gen_samps(f_gen_only, dp, params, misc, n_samp=5)
            generator.usegumbel.set_value(1)
            # if we beat a previous record or if this is the first time
            # AND we also beat the user-defined threshold or it doesnt exist
            top_val_ppl2 = gen_acc
        if it % 500 == 499:
            eval_acc, gen_acc = eval_discrm_gen('val',
                                                dp,
                                                params,
                                                f_pred_fns[0],
                                                misc,
                                                n_eval=500)
        if it % 1000 == 999:
            filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_genacc.p' % (
                params['dataset'], host, params['fappend'], it, gen_acc)
            dumpCheckpoint(filename, params, modelGen, modelEval, misc, it,
                           gen_acc)
        if it % 500 == 499:
            np.savez(iter_out_file,
                     eval_cost=np.array(cost_eval_iter),
                     gen_cost=np.array(cost_gen_iter),
                     tracksc=np.array(trackSc_array))

    # AND we also beat the user-defined threshold or it doesnt exist
    filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_GenDone.p' % (
        params['dataset'], host, params['fappend'], it, g_acc)
    dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, g_acc)
示例#43
0
def main(params):
  dim = 300
  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  # load glove vect dict
  #glove_dict_path = '../vecDict.pickle'
  #with open(glove_dict_path, 'rb') as handle:
  #  vec_dict = pickle.load(handle)

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = params['root_path']
  img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines()

  # load the features for all images
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape

  fileNameToVector = {}
  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] =img_names[n]

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : params['beam_size'] }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    
    # Calculate the sentence vector using avg of GLoVe word vectors
    #st = EnglishStemmer()
    #currSentenceVector = np.zeros(dim)
    #numWords = 0
    #for word in candidate.split():
    #  if st.stem(word) in vec_dict:
    #    currSentenceVector += vec_dict[st.stem(word)].astype(np.float)
    #    numWords += 1
    #  elif st.stem(word)+'e' in vec_dict:
    #    currSentenceVector += vec_dict[st.stem(word)+'e'].astype(np.float)
    #    numWords += 1

    #currSentenceVector /= numWords
    #fileNameToVector[img['local_file_path']] = currSentenceVector
 
    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

  # dump result struct to file
  save_file = os.path.join(root_path, 'result_struct.json')
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump the fileNameToVector mapping to a pickle file
  #with open('fileNameToVector.pickle', 'wb') as handle:
  #  pickle.dump(fileNameToVector, handle)

  # dump output html
  html = ''
  for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])
  html_file = os.path.join(root_path, 'result.html')
  print 'writing html result file to %s...' % (html_file, )
  open(html_file, 'w').write(html)
def main(params):

    # load the checkpoint
    checkpoint_path = params["checkpoint_path"]
    print "loading checkpoint %s" % (checkpoint_path,)
    checkpoint = pickle.load(open(checkpoint_path, "rb"))
    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model = checkpoint["model"]
    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob["params"] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # load the tasks.txt file
    root_path = params["root_path"]
    task_file = params["task_file"]
    img_names = open(task_file, "r").read().splitlines()

    # load the features for all images
    """
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  print features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape
  """
    features_path = params["feature_file"]
    features = pickle.load(open(features_path))
    features = features.T
    # features = features_struct['feats'] # this is a 4096 x N numpy array of features
    D, N = features.shape

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in xrange(N):
        print "image %d/%d:" % (n, N)

        # encode the image
        img = {}
        img["feat"] = features[:, n]
        img["local_file_path"] = img_names[n]

        # perform the work. heavy lifting happens inside
        kwparams = {"beam_size": params["beam_size"]}
        tic = time.time()
        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)
        toc = time.time()

        print "image %d/%d: %f" % (n, N, toc - tic)
        # build up the output
        img_blob = {}
        img_blob["img_path"] = img["local_file_path"]
        img_blob["rnn_time"] = toc - tic
        img_blob["candidate"] = {"text": [], "logprob": []}
        # encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        for i in xrange(min(5, len(top_predictions))):
            top_prediction = top_predictions[i]
            candidate = " ".join(
                [ixtoword[ix] for ix in top_prediction[1] if ix > 0]
            )  # ix 0 is the END token, skip that
            # print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate)
            img_blob["candidate"]["text"] += [candidate]
            img_blob["candidate"]["logprob"] += [top_prediction[0]]
        """
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    """
        # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
        blob["imgblobs"].append(img_blob)

    # dump result struct to file
    # save_file = os.path.join(root_path, 'result_struct.json')
    save_file = params["out_file"]
    print "writing predictions to %s..." % (save_file,)
    json.dump(blob, open(save_file, "w"))

    # dump output html
    """
示例#45
0
文件: test.py 项目: r-shyam/mv-lstm
def main(params, splitno, model_file):
    checkpoint_path = model_file
    max_blocks = params['max_blocks']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    feature_file = checkpoint_params['feature_file']
    json_file = checkpoint['json_file']
    model = checkpoint['model']

    # fetch the data provider
    dp = getDataProvider(dataset, feature_file, json_file)

    misc = {}
    misc['classtoix'] = checkpoint['classtoix']
    ixtoword = checkpoint['ixtoclass']

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all videos in test set and predict class labels
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    correct = 0
    prev_video_name = ''
    video_block_count = 0
    pred_video_label = []
    pred_video_lbl = 0
    prev_gt_video_label = 0
    label_check = False
    video_count = 0
    stat = []
    v_data = {}
    result = {}

    for img in dp.iterImagesContext(split='test', max_images=max_blocks):
        n += 1
        print 'clip %d/%d:' % (n, max_blocks)
        gt_video_label = img['sentences'][0]['tokens'][0]
        current_video_name = img['filename']

        Ys = BatchGenerator.predict([{'image': img}], model, checkpoint_params)
        pred_frame_labels = np.argmax(Ys[0], axis=1)
        current_pred_video_label = max_occurrences(pred_frame_labels)[0]

        # impl based on action recog using visual attn paper - http://arxiv.org/abs/1511.04119
        if current_video_name == prev_video_name or n == 1:
            pred_video_label.append(current_pred_video_label)
            video_block_count += 1
            prev_gt_video_label = gt_video_label
            prev_video_name = current_video_name
            label_check = False
        else:
            pred_video_lbl = max_occurrences(pred_video_label)[0]
            if pred_video_lbl == prev_gt_video_label:
                correct = correct + 1

            v_data['video_name'] = prev_video_name
            v_data['gt_label'] = prev_gt_video_label
            v_data['pred_label'] = int(pred_video_lbl)

            stat.append(v_data)
            v_data = {}

            pred_video_label = []
            video_block_count = 0
            label_check = True
            video_count += 1

            # process current video block
            pred_video_label.append(current_pred_video_label)
            prev_video_name = current_video_name
            video_block_count += 1
            prev_gt_video_label = gt_video_label

    if label_check == False:  # last block of videos
        video_count += 1
        pred_video_lbl = max_occurrences(pred_video_label)[0]
        if pred_video_lbl == prev_gt_video_label:
            correct = correct + 1

        v_data['video_name'] = prev_video_name
        v_data['gt_label'] = prev_gt_video_label
        v_data['pred_label'] = int(pred_video_lbl)

        stat.append(v_data)

    json.dump(stat, open("./status/mmdb_stat_split_%d.json" % (splitno), 'a'))
    accuracy = correct / float(video_count)

    result['split'] = splitno
    result['accuracy'] = accuracy
    json.dump(
        result,
        open("./status/mmdb_split_result_split_%d.json" % (splitno), 'a'))

    return accuracy
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model_npy = checkpoint['model']
  dump_folder = params['dump_folder']

  if 'use_theano' not in  checkpoint_params:
    checkpoint_params['use_theano'] = 1
  checkpoint_params['use_theano'] = 1

  if 'image_feat_size' not in  checkpoint_params:
    checkpoint_params['image_feat_size'] = 4096 

  if dump_folder:
    print 'creating dump folder ' + dump_folder
    os.system('mkdir -p ' + dump_folder)
    
  # fetch the data provider
  dp = getDataProvider(checkpoint_params)

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)

  if checkpoint_params['use_theano'] == 1:
  	# Compile and init the theano predictor 
    BatchGenerator.prepPredictor(model_npy, checkpoint_params,params['beam_size'])
    model = BatchGenerator.model_th
    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

  n = 0
  all_references = []
  all_candidates = []
  for img in dp.iterImages(split = 'test', max_images = max_images):
    n += 1
    print 'image %d/%d:' % (n, max_images)
    references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    kwparams = {'beam_size' : params['beam_size']}
    
    #img['feat'] = np.random.rand(*img['feat'].shape)

    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    
    img_blob = {} # we will build this up
    img_blob['img_path'] = img['local_file_path']
    img_blob['imgid'] = img['imgid']

    if dump_folder:
      # copy source file to some folder. This makes it easier to distribute results
      # into a webpage, because all images that were predicted on are in a single folder
      source_file = img['local_file_path']
      target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path']))
      os.system('cp %s %s' % (source_file, target_file))

    # encode the human-provided references
    img_blob['references'] = []
    for gtsent in references:
      print 'GT: ' + gtsent
      img_blob['references'].append({'text': gtsent})

    # now evaluate and encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    #import pdb; pdb.set_trace()
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    # save for later eval
    all_references.append(references)
    all_candidates.append(candidate)

    img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])}    

    # Code to save all the other candidates 
    candlist = []
    for ci in xrange(len(top_predictions)-1):
        prediction = top_predictions[ci+1] # these are sorted with highest on top
        candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        candlist.append({'text': candidate, 'logprob': float(prediction[0])})
    
    img_blob['candidatelist'] = candlist

    blob['imgblobs'].append(img_blob)

  # use perl script to eval BLEU score for fair comparison to other research work
  # first write intermediate files
  print 'writing intermediate files into eval/'
  open('eval/output', 'w').write('\n'.join(all_candidates))
  for q in xrange(5):
    open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references]))
  # invoke the perl script to get BLEU scores
  print 'invoking eval/multi-bleu.perl script...'
  owd = os.getcwd()
  os.chdir('eval')
  os.system('./multi-bleu.perl reference < output')
  os.chdir(owd)

  # now also evaluate test split perplexity
  
#  if checkpoint_params['use_theano'] == 0:
#    gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
#  else:
#    gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set
#  print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
#  blob['gtppl'] = gtppl
#
  # dump result struct to file
  print 'saving result struct to %s' % (params['result_struct_filename'], )
  json.dump(blob, open(params['result_struct_filename'], 'w'))
示例#47
0
def main(params):
  batch_size = params['batch_size']
  dataset = params['dataset']
  word_count_threshold = params['word_count_threshold']
  do_grad_check = params['do_grad_check']
  max_epochs = params['max_epochs']


  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {} # stores various misc items that need to be passed around the framework

  # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
  # at least word_count_threshold number of times
  misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold)

  # delegate the initialization of the model to the Generator class
  BatchGenerator = decodeGenerator(params)
  init_struct = BatchGenerator.init(params, misc)
  model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize'])

  # force overwrite here. This is a bit of a hack, not happy about it
  model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

  print 'model init done.'
  print 'model has keys: ' + ', '.join(model.keys())
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update'])
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize'])
  print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

  if params.get('init_model_from', ''):
    # load checkpoint
    checkpoint = pickle.load(open(params['init_model_from'], 'rb'))
    model = checkpoint['model'] # overwrite the model

  # initialize the Solver and the cost function
  solver = Solver()
  def costfun(batch, model):
    # wrap the cost function to abstract some things away from the Solver
    return RNNGenCost(batch, model, params, misc)

  # calculate how many iterations we need
  num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences')
  num_iters_one_epoch = num_sentences_total / batch_size
  max_iters = max_epochs * num_iters_one_epoch
  eval_period_in_epochs = params['eval_period']
  eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs))
  abort = False
  top_val_ppl2 = -1
  smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion
  val_ppl2 = len(misc['ixtoword'])
  last_status_write_time = 0 # for writing worker job status reports
  json_worker_status = {}
  json_worker_status['params'] = params
  json_worker_status['history'] = []

  import csv
  csvfile = open(os.path.join(params['outdir'],params['generator']+'.csv'),'wb')
  csvout = csv.writer(csvfile,delimiter=',',quotechar='"')

  csv_val_file = open(os.path.join(params['outdir'],params['generator']+'_val.csv'),'wb')
  csv_val_out = csv.writer(csv_val_file,delimiter=',',quotechar='"')

  for it in xrange(max_iters):
    if abort: break
    t0 = time.time()
    # fetch a batch of data
    batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
    # evaluate cost, gradient and perform parameter update
    step_struct = solver.step(batch, model, costfun, **params)
    cost = step_struct['cost']
    dt = time.time() - t0

    # print training statistics
    train_ppl2 = step_struct['stats']['ppl2']
    smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average
    if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out
    epoch = it * 1.0 / num_iters_one_epoch
    print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
          % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
             train_ppl2, smooth_train_ppl2)

    csvout.writerow([it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'],train_ppl2, smooth_train_ppl2])
    csvfile.flush()

    if not host=='oliver-Aurora-R4':
      sys.stdout.flush()

    # os.system('./update_plots.sh')

    # perform gradient check if desired, with a bit of a burnin time (10 iterations)
    if it == 10 and do_grad_check:
      print 'disabling dropout for gradient check...'
      params['drop_prob_encoder'] = 0
      params['drop_prob_decoder'] = 0
      solver.gradCheck(batch, model, costfun)
      print 'done gradcheck, exitting.'
      sys.exit() # hmmm. probably should exit here

    # detect if loss is exploding and kill the job if so
    total_cost = cost['total_cost']
    if it == 0:
      total_cost0 = total_cost # store this initial cost
    if total_cost > total_cost0 * 2:
      print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
      abort = True # set the abort flag, we'll break out

    # logging: write JSON files for visual inspection of the training
    tnow = time.time()
    if tnow > last_status_write_time + 60*1: # every now and then lets write a report
      last_status_write_time = tnow
      jstatus = {}
      jstatus['time'] = datetime.datetime.now().isoformat()
      jstatus['iter'] = (it, max_iters)
      jstatus['epoch'] = (epoch, max_epochs)
      jstatus['time_per_batch'] = dt
      jstatus['smooth_train_ppl2'] = smooth_train_ppl2
      jstatus['val_ppl2'] = val_ppl2 # just write the last available one
      jstatus['train_ppl2'] = train_ppl2
      json_worker_status['history'].append(jstatus)
      status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json')
      try:
        json.dump(json_worker_status, open(status_file, 'w'))
      except Exception, e: # todo be more clever here
        print 'tried to write worker status into %s but got error:' % (status_file, )
        print e

    # perform perplexity evaluation on the validation set and save a model checkpoint if it's good
    is_last_iter = (it+1) == max_iters
    if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
      val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set
      print 'validation perplexity = %f' % (val_ppl2, )

      cp_pred = {}
      cp_pred['it'] = it
      cp_pred['epoch'] = epoch
      cp_pred['model'] = model
      cp_pred['params'] = params
      cp_pred['perplexity'] = val_ppl2
      cp_pred['wordtoix'] = misc['wordtoix']
      cp_pred['ixtoword'] = misc['ixtoword']
      cp_pred['algorithm'] = params['generator']
      cp_pred['outdir'] = params['outdir']

      if is_last_iter:
        scores = eval_sentence_predictions.run(cp_pred)
        csv_val_out.writerow([it, max_iters, dt, epoch, val_ppl2, scores[0],scores[1],scores[2],scores[3],scores[4],scores[5],scores[6]])
        csv_val_file.flush()
	omail.send('job finished'+params['generator'],'done')


      # abort training if the perplexity is no good
      min_ppl_or_abort = params['min_ppl_or_abort']
      if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0:
        print 'aborting job because validation perplexity %f < %f' % (val_ppl2, min_ppl_or_abort)
        abort = True # abort the job

      write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
      if  val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
        if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
          # if we beat a previous record or if this is the first time
          # AND we also beat the user-defined threshold or it doesnt exist
          top_val_ppl2 = val_ppl2
          filename = 'model_%s_checkpoint_%s_%s_%s_%.2f.p' % (params['generator'],dataset, host, params['fappend'], val_ppl2)
          filepath = os.path.join(params['outdir'], filename)
          checkpoint = {}
          checkpoint['it'] = it
          checkpoint['epoch'] = epoch
          checkpoint['model'] = model
          checkpoint['params'] = params
          checkpoint['perplexity'] = val_ppl2
          checkpoint['wordtoix'] = misc['wordtoix']
          checkpoint['ixtoword'] = misc['ixtoword']

          checkpoint['algorithm'] = params['generator']
          checkpoint['outdir'] = params['outdir']

          try:
            pickle.dump(checkpoint, open(filepath, "wb"))
            print 'saved checkpoint in %s' % (filepath, )
          except Exception, e: # todo be more clever here
            print 'tried to write checkpoint into %s but got error: ' % (filepat, )
            print e

          scores = eval_sentence_predictions.run(checkpoint)
          csv_val_out.writerow([it, max_iters, dt, epoch, val_ppl2, scores[0],scores[1],scores[2],scores[3],scores[4],scores[5],scores[6]])
          csv_val_file.flush()
示例#48
0
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    max_images = params['max_images']

    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    dataset = checkpoint_params['dataset']
    model_npy = checkpoint['model']
    dump_folder = params['dump_folder']

    if 'use_theano' not in checkpoint_params:
        checkpoint_params['use_theano'] = 1
    checkpoint_params['use_theano'] = 1

    if 'image_feat_size' not in checkpoint_params:
        checkpoint_params['image_feat_size'] = 4096

    if dump_folder:
        print 'creating dump folder ' + dump_folder
        os.system('mkdir -p ' + dump_folder)

    # fetch the data provider
    dp = getDataProvider(checkpoint_params)

    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    blob = {
    }  # output blob which we will dump to JSON for visualizing the results
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)

    if checkpoint_params['use_theano'] == 1:
        # Compile and init the theano predictor
        BatchGenerator.prepPredictor(model_npy, checkpoint_params,
                                     params['beam_size'])
        model = BatchGenerator.model_th
        print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
          checkpoint['perplexity']))

    n = 0
    all_references = []
    all_candidates = []
    for img in dp.iterImages(split='test', max_images=max_images):
        n += 1
        print 'image %d/%d:' % (n, max_images)
        references = [' '.join(x['tokens'])
                      for x in img['sentences']]  # as list of lists of tokens
        kwparams = {'beam_size': params['beam_size']}

        img['feat'] = np.random.rand(*img['feat'].shape)

        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob['img_path'] = img['local_file_path']
        img_blob['imgid'] = img['imgid']

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img['local_file_path']
            target_file = os.path.join(
                dump_folder, os.path.basename(img['local_file_path']))
            os.system('cp %s %s' % (source_file, target_file))

        # encode the human-provided references
        img_blob['references'] = []
        for gtsent in references:
            print 'GT: ' + gtsent
            img_blob['references'].append({'text': gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        #import pdb; pdb.set_trace()
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print 'PRED: (%f) %s' % (top_prediction[0], candidate)

        # save for later eval
        all_references.append(references)
        all_candidates.append(candidate)

        img_blob['candidate'] = {
            'text': candidate,
            'logprob': float(top_prediction[0])
        }

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[
                ci + 1]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            candlist.append({
                'text': candidate,
                'logprob': float(prediction[0])
            })

        img_blob['candidatelist'] = candlist

        blob['imgblobs'].append(img_blob)

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print 'writing intermediate files into eval/'
    open('eval/output', 'w').write('\n'.join(all_candidates))
    for q in xrange(5):
        open('eval/reference' + ` q `,
             'w').write('\n'.join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print 'invoking eval/multi-bleu.perl script...'
    owd = os.getcwd()
    os.chdir('eval')
    os.system('./multi-bleu.perl reference < output')
    os.chdir(owd)

    # now also evaluate test split perplexity

    #  if checkpoint_params['use_theano'] == 0:
    #    gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
    #  else:
    #    gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set
    #  print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
    #  blob['gtppl'] = gtppl
    #
    # dump result struct to file
    print 'saving result struct to %s' % (params['result_struct_filename'], )
    json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']

  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  all_bleu_scores = []
  n = 0
  #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images):
  for img in dp.iterImages(split = 'test', max_images = max_images):
    n+=1
    print 'image %d/%d:' % (n, max_images)
    references = [x['tokens'] for x in img['sentences']] # as list of lists of tokens
    kwparams = { 'tanhC_version' : checkpoint_params.get('tanhC_version', 0) ,\
                 'beam_size' : params['beam_size'],\
                 'generator' : checkpoint_params['generator']}
    Ys = BatchGenerator.predict([{'image':img}], model, **kwparams)

    img_blob = {} # we will build this up
    img_blob['img_path'] = img['local_file_path']
    img_blob['imgid'] = img['imgid']

    # encode the human-provided references
    img_blob['references'] = []
    for gtwords in references:
      print 'GT: ' + ' '.join(gtwords)
      img_blob['references'].append({'text': ' '.join(gtwords)})

    # now evaluate and encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = [ixtoword[ix] for ix in top_prediction[1]]
    print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate))
    bleu_scores = evalCandidate(candidate, references)
    print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores)
    img_blob['candidate'] = {'text': ' '.join(candidate), 'logprob': top_prediction[0], 'bleu': bleu_scores}

    all_bleu_scores.append(bleu_scores)
    blob['imgblobs'].append(img_blob)

  print 'final average bleu scores:'
  bleu_averages = [sum(x[i] for x in all_bleu_scores)*1.0/len(all_bleu_scores) for i in xrange(3)]
  blob['final_result'] = { 'bleu' : bleu_averages }
  print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages)
  
  # now also evaluate test split perplexity
  gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
  print 'perplexity of ground truth words: %f' % (gtppl, )
  blob['gtppl'] = gtppl

  # dump result struct to file
  print 'saving result struct to %s' % (params['result_struct_filename'], )
  json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print('loading checkpoint %s' % (checkpoint_path, ))
    checkpoint = pickle.load(open(checkpoint_path, 'rb'), encoding='latin1')
    checkpoint_params = checkpoint['params']
    model = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = params['root_path']
    img_names = open(os.path.join(root_path, 'img', 'tasks.txt'),
                     'r').read().splitlines()

    # load the features for all images
    features_path = os.path.join(root_path, 'self_img_vgg_feats.npy')
    # features_struct = scipy.io.loadmat(features_path)
    features = np.load(features_path)
    features = features.T  # this is a 4096 x N numpy array of features
    D, N = features.shape

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    for n in range(N):
        print('image %d/%d:' % (n, N))

        # encode the image
        img = {}
        img['feat'] = features[:, n]
        img['local_file_path'] = img_names[n]

        # perform the work. heavy lifting happens inside
        kwparams = {'beam_size': params['beam_size']}
        Ys = BatchGenerator.predict([{
            'image': img
        }], model, checkpoint_params, **kwparams)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[ix] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print('PRED: (%f) %s' % (top_prediction[0], candidate))
        img_blob['candidate'] = {
            'text': candidate,
            'logprob': top_prediction[0]
        }
        blob['imgblobs'].append(img_blob)

    # dump result struct to file
    save_file = os.path.join(root_path, 'result_struct.json')
    print('writing predictions to %s...' % (save_file, ))
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    html = ''
    for img in blob['imgblobs']:
        html += '<img src="%s" height="400"><br>' % ('img/' +
                                                     img['img_path'], )
        html += '(%f) %s <br><br>' % (img['candidate']['logprob'],
                                      img['candidate']['text'])
    html_file = os.path.join(root_path, 'result.html')
    print('writing html result file to %s...' % (html_file, ))
    open(html_file, 'w').write(html)
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  max_images = params['max_images']

  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']

  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  blob = {} # output blob which we will dump to JSON for visualizing the results
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # iterate over all images in test set and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  n = 0
  all_references = []
  all_candidates = []
  for img in dp.iterImages(split = 'test', max_images = max_images):
    n+=1
    print 'image %d/%d:' % (n, max_images)
    references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens
    kwparams = { 'beam_size' : params['beam_size'] }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    img_blob = {} # we will build this up
    img_blob['img_path'] = img['local_file_path']
    img_blob['imgid'] = img['imgid']

    # encode the human-provided references
    img_blob['references'] = []
    for gtsent in references:
      print 'GT: ' + gtsent
      img_blob['references'].append({'text': gtsent})

    # now evaluate and encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)

    # save for later eval
    all_references.append(references)
    all_candidates.append(candidate)

    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

  # use perl script to eval BLEU score for fair comparison to other research work
  # first write intermediate files
  print 'writing intermediate files into eval/'
  open('eval/output', 'w').write('\n'.join(all_candidates))
  for q in xrange(5):
    open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references]))
  # invoke the perl script to get BLEU scores
  print 'invoking eval/multi-bleu.perl script...'
  owd = os.getcwd()
  os.chdir('eval')
  os.system('./multi-bleu.perl reference < output')
  os.chdir(owd)

  # now also evaluate test split perplexity
  gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
  print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
  blob['gtppl'] = gtppl

  # dump result struct to file
  print 'saving result struct to %s' % (params['result_struct_filename'], )
  json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  # TODO FIND EASY WAY TO CALL FILE WITH PROPER root
  root_path = params['root_path']
  img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines()

  # load the features for all images
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] = img_names[n]
    print img['local_file_path']

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : params['beam_size'] }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    print Ys
    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    # top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    # top_prediction = top_predictions[0] # these are sorted with highest on top
    # candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    # print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
    # blob['imgblobs'].append(img_blob)

    # encode the top prediction my attempt at showing all candidates
    img_blob['candidates'] = []

    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    print 'Number of name candidates', top_predictions
    # TODO TIME IT. SEEMS PRETTY FAST THOUGH
    for i in range(0, len(top_predictions)):

      top_prediction = top_predictions[i] # these are sorted with highest on top
      candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
      print [ixtoword[ix] for ix in top_prediction[1] if ix > 0]
      print 'PRED: (%f) %s' % (top_prediction[0], candidate)

      img_blob['candidates'].append({'text': candidate, 'logprob': top_prediction[0]})
      # VERY IMPORTANT LINE
      blob['imgblobs'].append(img_blob)

  # dump result struct to file
  save_file = os.path.join(root_path, 'result_struct.json')
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  html = ''
  for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    #print 'Number of name candidates', len(img['candidates'])
    for i in range(0, len(img['candidates'])):

      html += '(%f) %s <br><br>' % (img['candidates'][i]['logprob'], img['candidates'][i]['text'])
    #print html
  html_file = os.path.join(root_path, 'result.html')
  print 'writing html result file to %s...' % (html_file, )
  open(html_file, 'w').write(html)

if __name__ == "__main__":

  parser = argparse.ArgumentParser()
  parser.add_argument('checkpoint_path', type=str, help='the input checkpoint')
  parser.add_argument('-r', '--root_path', default='example_images', type=str, help='folder with the images, tasks.txt file, and corresponding vgg_feats.mat file')
  parser.add_argument('-b', '--beam_size', type=int, default=1, help='beam size in inference. 1 indicates greedy per-word max procedure. Good value is approx 20 or so, and more = better.')

  args = parser.parse_args()
  params = vars(args) # convert to ordinary dict
  print 'parsed parameters:'
  print json.dumps(params, indent = 2)
  main(params)
示例#53
0
def main(video_name):

  # load the checkpoint
  checkpoint_path = '/home/t-yuche/neuraltalk/models/flickr8k_cnn_lstm_v1.p'
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  dataset = checkpoint_params['dataset']
  model = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = os.path.join('/mnt/frames', video_name)
  all_frames = [os.path.join('/mnt/frames/', video_name, x) for x in os.listdir(os.path.join('/mnt/frames', video_name))]

  # Load unprocessed frames to filenames
  fei_cap_data = load_video_caption('/mnt/tags/fei-caption-keyframe', video_name)
  processed_frames = [x['img_path'] for x in fei_cap_data]
  blob['imgblobs'] = blob['imgblobs'] + fei_cap_data

  img_names = []
  for frame in all_frames:
    if frame not in processed_frames:
      img_names += [frame]

  # load the features for all images
  '''
  features_path = os.path.join(root_path, 'vgg_feats.mat')
  features_struct = scipy.io.loadmat(features_path)
  features = features_struct['feats'] # this is a 4096 x N numpy array of features
  print features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape
  '''
  features_path = os.path.join('/mnt/tags/fei-caption-all-pickle', video_name + '.pickle')
  features = pickle.load(open(features_path))
  features = features.T
  #features = features_struct['feats'] # this is a 4096 x N numpy array of features
  D,N = features.shape

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] = img_names[n]

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : 20 }
    tic = time.time()
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    toc = time.time()

    print 'image %d/%d: %f' % (n, N, toc-tic)
    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']
    img_blob['rnn_time'] = (toc-tic)
    img_blob['candidate'] = {'text': [], 'logprob': []}
    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    for i in xrange(min(5, len(top_predictions))):
        top_prediction = top_predictions[i]  
        candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate)
        img_blob['candidate']['text'] += [candidate]
        img_blob['candidate']['logprob'] += [top_prediction[0]]
    '''
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    '''    
    #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

  # dump result struct to file
  #save_file = os.path.join(root_path, 'result_struct.json')
  save_file = os.path.join('/mnt/tags/fei-caption-all', video_name + '_5_caption.json')
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  '''