Exemplo n.º 1
0
def loadfeatsandlabels(args):
    if args.visfeats == 1:
        imgIdLbl = open(args.imgidlabel, 'r').read().splitlines()
        cocoIdtoFeatIdx = {}
        for imgL in imgIdLbl:
            cocoIdtoFeatIdx[int(imgL.split()[1][1:-1])] = int(
                imgL.split()[0][1:])

        # Now load the features:
        params = {}
        f_list = []
        featN = []
        if args.feats != None:
            from imagernn.data_provider import prepare_data, loadArbitraryFeatures
        for i, f in enumerate(args.feats):
            params['feat_file'] = f
            feat, _, feat_idx, _ = loadArbitraryFeatures(params)
            f_list.append(feat)
            featN.append(
                args.featNames[i] if args.featNames != None else 'feat_' +
                str(i))

        cLabls = []
        for l in args.clslabels:
            cL = open(l, 'r').read().splitlines()
            cLabls.append(cL)
        return f_list, cLabls, featN, cocoIdtoFeatIdx
    else:
        return [], [], [], []
Exemplo n.º 2
0
def main(params):

  # load the checkpoint
  if params['multi_model'] == 0:
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']
    model_npy = checkpoint['model']
    checkpoint_params['use_theano'] = 1
    if 'image_feat_size' not in  checkpoint_params:
        checkpoint_params['image_feat_size'] = 4096 
    
    if 'misc' in checkpoint:
      misc = checkpoint['misc']
      ixtoword = misc['ixtoword']
    else:
      misc = {}
      ixtoword = checkpoint['ixtoword']
      misc['wordtoix'] = checkpoint['wordtoix']
    
    checkpoint_params['softmax_smooth_factor'] = params['softmax_smooth_factor']
    checkpoint_params['softmax_propogate'] = params['softmax_propogate']
    if checkpoint_params.get('class_out_factoring',0) == 1:
      checkpoint_params['ixtoclsinfo'] = np.zeros((checkpoint_params['nClasses'],2),dtype=np.int32)
      ixtoclsinfo = misc['ixtoclsinfo']
      checkpoint_params['ixtoclsinfo'][ixtoclsinfo[:,0]] = ixtoclsinfo[:,1:3]

    if checkpoint_params.get('sched_sampling_mode',None) !=None:
        checkpoint_params['sched_sampling_mode'] = None
    
    BatchGenerator = decodeGenerator(checkpoint_params)
    # Compile and init the theano predictor 
    BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size'])
    model = BatchGenerator.model_th
  else:
    BatchGenerator = []
    model_npy = []
    modelTh = []
    checkpoint_params = []
    for i,checkpoint_path in enumerate(params['checkpoint_path']):
        checkpoint = pickle.load(open(checkpoint_path, 'rb'))
        model_npy.append(checkpoint['model'])
        checkpoint_params.append(checkpoint['params'])
        checkpoint_params[i]['use_theano'] = 1
        BatchGenerator.append(decodeGenerator(checkpoint_params[i]))
        zipp(model_npy[i],BatchGenerator[i].model_th)
        modelTh.append(BatchGenerator[i].model_th)
        modelTh[i]['comb_weight'] = 1.0/params['nmodels']
    
    BatchGenerator[0].prepMultiPredictor(modelTh,checkpoint_params,params['beam_size'],params['nmodels'])
  
  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = copy(checkpoint_params)
  if checkpoint_params.get('class_out_factoring',0) == 1:
    blob['checkpoint_params'].pop('ixtoclsinfo')
  blob['imgblobs'] = []

  # load the tasks.txt file and setupe feature loading
  root_path = params['root_path']
  img_names_list = open(params['imgList'], 'r').read().splitlines()
  auxidxes = []

  if len(img_names_list[0].rsplit(',')) > 2:
    img_names = [x.rsplit(',')[0] for x in img_names_list]
    idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
    auxidxes = [int(x.rsplit(',')[2]) for x in img_names_list]
  elif len(img_names_list[0].rsplit(',')) > 1:
    img_names = [x.rsplit(',')[0] for x in img_names_list]
    idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
  else:
    img_names = img_names_list
    idxes = xrange(len(img_names_list))
  
  #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'):
  #  raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
  #  return
  # load the features for all images
  if checkpoint_params.get('swap_aux') == 0 or auxidxes == []:
    features, aux_inp = loadArbitraryFeatures(params, idxes, auxidxes=auxidxes)
  else:
    features, aux_inp = loadArbitraryFeatures(params, auxidxes, auxidxes=idxes)

  N = len(img_names) 

  # iterate over all images and predict sentences
  print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
    checkpoint['perplexity']))
  
  kwparams = { 'beam_size' : params['beam_size'] }
  
  jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) 
  save_file = os.path.join(root_path, jsonFname)
  
  for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    if params['multi_model'] == 0:
        D,NN = features.shape
        img = {}
        img['feat'] = features[:, n]
        if checkpoint_params.get('en_aux_inp',0):
            img['aux_inp'] = aux_inp[:, n]
        img['local_file_path'] =img_names[n]
        # perform the work. heavy lifting happens inside
        Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)
    else:
        kwparams['nmodels'] = params['nmodels']
        batch = []
        for i in xrange(params['nmodels']):
            img = {}
            img['feat'] = features[i][:, n]
            if checkpoint_params[i].get('en_aux_inp',0):
                img['aux_inp'] = aux_inp[i][:, n]
            img['local_file_path'] =img_names[n]
            batch.append({'image':img})
        Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams)

    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    if checkpoint_params.get('reverse_sentence',0) == 0:
        candidate = ' '.join([ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    else:
        candidate = ' '.join([ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
    img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])}    

    # Code to save all the other candidates 
    candlist = []
    for ci in xrange(len(top_predictions)-1):
        prediction = top_predictions[ci+1] # these are sorted with highest on top
        candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that
        candlist.append({'text': candidate, 'logprob': float(prediction[0])})
    
    img_blob['candidatelist'] = candlist
    blob['imgblobs'].append(img_blob)
    if (n%5000) == 1:
        print 'writing predictions to %s...' % (save_file, )
        json.dump(blob, open(save_file, 'w'))

  # dump result struct to file
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))

  # dump output html
  html = ''
  for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])

  html_file = 'result_%s.html' % (params['fname_append']) 
  html_file = os.path.join(root_path, html_file)
  print 'writing html result file to %s...' % (html_file, )
  open(html_file, 'w').write(html)
Exemplo n.º 3
0
def main(params):
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    cp_params = checkpoint['params']
    model_npy = checkpoint['model']

    # Load the candidates db generated from rnn's
    if params['candDb'] != None:
        candDb = json.load(open(params['candDb'], 'r'))
    else:
        candDb = mergeRes(params)

    wordtoix = checkpoint[
        'wordtoix'] if 'wordtoix' in checkpoint else checkpoint['misc'][
            'wordtoix']

    # Read labels and build cocoid to imgid Map
    if params['dataset'] == 'coco':
        lbls = open(params['lblF'], 'r').read().splitlines()
        objId2Imgid = {}
        for lb in lbls:
            objId2Imgid[str(int(lb.split()[1][1:-1]))] = int(lb.split()[0][1:])
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, Ellipsis)

    elif params['dataset'] == 'msr-vtt':
        img_names_list = open(params['lblF'], 'r').read().splitlines()
        auxidxes = []
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        objId2Imgid = {imn.split('.')[0]: i for i, imn in enumerate(img_names)}
        if len(img_names_list[0].split(',', 1)) > 1:
            if type(
                    ast.literal_eval(img_names_list[0].split(
                        ',', 1)[1].strip())) == tuple:
                idxes = [
                    ast.literal_eval(x.split(',', 1)[1].strip())[0]
                    for x in img_names_list
                ]
                auxidxes = [
                    ast.literal_eval(x.split(',', 1)[1].strip())[1]
                    for x in img_names_list
                ]
            else:
                idxes = [
                    ast.literal_eval(x.split(',', 1)[1].strip())
                    for x in img_names_list
                ]
        else:
            idxes = xrange(len(img_names_list))
        params['poolmethod'] = cp_params['poolmethod'] if params[
            'poolmethod'] == None else params['poolmethod']
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, idxes, auxidxes=auxidxes)

    elif params['dataset'] == 'lsmdc':
        if params['use_label_file'] == 1:
            params['poolmethod'] = cp_params['poolmethod'] if params[
                'poolmethod'] == None else params['poolmethod']
            params['labels'] = cp_params['labels'] if params[
                'labels'] == None else params['labels']
            params['featfromlbl'] = cp_params['featfromlbl'] if params[
                'featfromlbl'] == None else params['featfromlbl']
            params['uselabel'] = cp_params['uselabel'] if params[
                'uselabel'] == None else params['uselabel']
        else:
            params['uselabel'] = 0
        img_names_list = open(params['lblF'], 'r').read().splitlines()
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
        auxidxes = []
        objId2Imgid = {
            osp.basename(imn).split('.')[0]: i
            for i, imn in enumerate(img_names)
        }

        #import pdb;pdb.set_trace()
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, idxes, auxidxes=auxidxes)

    if cp_params.get('use_encoder_for', 0) & 1:
        imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'],
                                              cp_params['sent_encoding_size'],
                                              cp_params,
                                              mdl_prefix='img_enc_',
                                              features=features.T)
        zipp(model_npy, imgFeatEncoder.model_th)
        (imgenc_use_dropout, imgFeatEnc_inp, xI,
         updatesLSTMImgFeat) = imgFeatEncoder.build_model(
             imgFeatEncoder.model_th, cp_params)
    else:
        xI = None
        imgFeatEnc_inp = []

    if 'eval_model' not in cp_params:
        cp_params['eval_model'] = params['eval_model']
        print 'Using evaluator module: ', cp_params['eval_model']

    #find the number of candidates per image and max sentence len
    batch_size = 0
    maxlen = 0
    for i, img in enumerate(candDb['imgblobs']):
        for ids, cand in enumerate(img['candidatelist']):
            tks = cand['text'].split(' ')
            # Also tokenize the candidates
            candDb['imgblobs'][i]['candidatelist'][ids]['tokens'] = tks
            if len(tks) > maxlen:
                maxlen = len(tks)
        if batch_size < len(img['candidatelist']):
            batch_size = len(img['candidatelist'])

    # Get all images to this batch size!
    # HACK!!
    maxlen = 24
    cp_params['maxlen'] = maxlen

    cp_params['batch_size'] = batch_size
    print maxlen

    # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
    # at least word_count_threshold number of times

    # This initializes the model parameters and does matrix initializations
    cp_params['mode'] = 'predict'
    evalModel = decodeEvaluator(cp_params)
    model = evalModel.model_th

    # Define the computational graph for relating the input image features and word indices to the
    # log probability cost funtion.
    (use_dropout, inp_list_eval, f_pred_fns, cost, predTh,
     modelUpd) = evalModel.build_model(model,
                                       cp_params,
                                       xI=xI,
                                       prior_inp_list=imgFeatEnc_inp)

    inp_list = imgFeatEnc_inp + inp_list_eval

    # Add the regularization cost. Since this is specific to trainig and doesn't get included when we
    # evaluate the cost on test or validation data, we leave it here outside the model definition

    # Now let's build a gradient computation graph and rmsprop update mechanism
    # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images
    # Hence in case of coco/flickr this will 5* no of images
    ## Initialize the model parameters from the checkpoint file if we are resuming training
    model = modelUpd if cp_params['eval_model'] == 'cnn' else model
    zipp(model_npy, model)
    print("\nPredicting using model %s, run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_path, checkpoint['epoch'], \
      checkpoint['perplexity']))

    pos_samp = np.arange(
        1, dtype=np.int32) if cp_params['eval_model'] == 'cnn' else []

    #Disable using dropout in training
    use_dropout.set_value(0.)
    if cp_params.get('use_encoder_for', 0) & 1:
        imgenc_use_dropout.set_value(0.)
    N = len(candDb['imgblobs'])
    stats = np.zeros((batch_size))
    #################### Main Loop ############################################
    for i, img in enumerate(candDb['imgblobs']):
        # fetch a batch of data
        print 'image %d/%d  \r' % (i, N),
        batch = []
        cbatch_len = len(img['candidatelist'])
        objid = osp.basename(img['img_path']).split('_')[-1].split('.')[0]
        if params['dataset'] == 'coco':
            objid = str(int(objid))

        for s in img['candidatelist']:
            batch.append({
                'sentence': s,
                'image': {
                    'feat': features[:, feat_idx[objId2Imgid[objid]]].T,
                    'img_idx': feat_idx[objId2Imgid[objid]]
                }
            })
            if params['aux_inp_file'] != None:
                batch[-1]['aux_inp'] = aux_inp[:,
                                               aux_idx[objId2Imgid[objid]]].T

        if cbatch_len < batch_size and (cp_params['eval_model'] == 'cnn'):
            for z in xrange(batch_size - cbatch_len):
                batch.append({'sentence': img['candidatelist'][-1]})

        enc_inp_list = prepare_seq_features(
            batch,
            use_enc_for=cp_params.get('use_encoder_for', 0),
            use_shared_mem=cp_params.get('use_shared_mem_enc', 0),
            pos_samp=pos_samp)
        eval_inp_list, lenS = prepare_data(batch,
                                           wordtoix,
                                           maxlen=maxlen,
                                           pos_samp=pos_samp,
                                           prep_for=cp_params['eval_model'],
                                           use_enc_for=cp_params.get(
                                               'use_encoder_for', 0))

        real_inp_list = enc_inp_list + eval_inp_list

        #import pdb;pdb.set_trace()
        # evaluate cost, gradient and perform parameter update
        scrs = np.squeeze(f_pred_fns[1](*real_inp_list))
        scrs = scrs[:cbatch_len]  # + scrs[:,cbatch_len:].sum()/cbatch_len
        for si, s in enumerate(img['candidatelist']):
            candDb['imgblobs'][i]['candidatelist'][si]['logprob'] = float(
                scrs[si])
            candDb['imgblobs'][i]['candidatelist'][si].pop('tokens')
        bestcand = scrs.argmax()
        stats[bestcand] += 1.0
        candDb['imgblobs'][i]['candidate'] = candDb['imgblobs'][i][
            'candidatelist'][bestcand]
        srtidx = np.argsort(scrs)[::-1]
        candDb['imgblobs'][i]['candsort'] = list(srtidx)
        # print training statistics

    print ""
    jsonFname = '%s_reranked_%s.json' % (cp_params['eval_model'],
                                         params['fname_append'])
    save_file = os.path.join(params['root_path'], jsonFname)
    json.dump(candDb, open(save_file, 'w'))
    print 'Written to file %s' % save_file
    print 'Final stats are:'
    print stats * 100.0 / N
Exemplo n.º 4
0
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    cp_params = checkpoint['params']

    if params['gen_model'] == None:
        model_npy = checkpoint[
            'model'] if 'model' in checkpoint else checkpoint['modelGen']
    else:
        gen_cp = pickle.load(open(params['gen_model'], 'rb'))
        model_npy = gen_cp.get('model', {})

    cp_params['use_theano'] = 1
    if params['dobeamsearch']:
        cp_params['advers_gen'] = 0

    if params['use_label_file'] == 1:
        params['poolmethod'] = cp_params['poolmethod'] if params[
            'poolmethod'] == None else params['poolmethod']
        params['labels'] = cp_params['labels'] if params[
            'labels'] == None else params['labels']
        params['featfromlbl'] = cp_params['featfromlbl'] if params[
            'featfromlbl'] == None else params['featfromlbl']
        params['uselabel'] = cp_params['uselabel'] if params[
            'uselabel'] == None else params['uselabel']
    else:
        params['uselabel'] = 0
    print 'parsed parameters:'
    print json.dumps(params, indent=2)

    if 'image_feat_size' not in cp_params:
        cp_params['image_feat_size'] = 4096

    if 'misc' in checkpoint:
        misc = checkpoint['misc']
        ixtoword = misc['ixtoword']
    else:
        misc = {}
        ixtoword = checkpoint['ixtoword']
        misc['wordtoix'] = checkpoint['wordtoix']

    cp_params['softmax_smooth_factor'] = params['softmax_smooth_factor']
    cp_params['softmax_propogate'] = params['softmax_propogate']
    cp_params['computelogprob'] = params['computelogprob']
    cp_params['greedy'] = params['greedy']
    cp_params['gen_input_noise'] = 0

    if cp_params.get('sched_sampling_mode', None) != None:
        cp_params['sched_sampling_mode'] = None

    # load the tasks.txt file and setupe feature loading
    root_path = params['root_path']

    img_names_list = open(params['imgList'], 'r').read().splitlines()
    auxidxes = []

    img_names = [x.rsplit(',')[0] for x in img_names_list]

    if len(img_names_list[0].split(',', 1)) > 1:
        if type(ast.literal_eval(img_names_list[0].split(
                ',', 1)[1].strip())) == tuple:
            idxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())[0]
                for x in img_names_list
            ]
            auxidxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())[1]
                for x in img_names_list
            ]
        else:
            idxes = [
                ast.literal_eval(x.split(',', 1)[1].strip())
                for x in img_names_list
            ]
    else:
        idxes = xrange(len(img_names_list))

    if cp_params.get('swap_aux') == 0 or auxidxes == []:
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, idxes, auxidxes=auxidxes)
    else:
        features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures(
            params, auxidxes, auxidxes=idxes)

    ##-------------------------------- Setup the models --------------------------###########
    if cp_params.get('use_encoder_for', 0) & 1:
        imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'],
                                              cp_params['word_encoding_size'],
                                              cp_params,
                                              mdl_prefix='img_enc_',
                                              features=features.T)

        zipp(model_npy, imgFeatEncoder.model_th)
        (imgenc_use_dropout, imgFeatEnc_inp, xI,
         updatesLSTMImgFeat) = imgFeatEncoder.build_model(
             imgFeatEncoder.model_th, cp_params)
    else:
        xI = None
        imgFeatEnc_inp = []

    if cp_params.get('use_encoder_for', 0) & 2:
        auxFeatEncoder = RecurrentFeatEncoder(cp_params['aux_inp_size'],
                                              cp_params['image_encoding_size'],
                                              cp_params,
                                              mdl_prefix='aux_enc_',
                                              features=aux_inp.T)
        zipp(model_npy, auxFeatEncoder.model_th)
        (auxenc_use_dropout, auxFeatEnc_inp, xAux,
         updatesLSTMAuxFeat) = auxFeatEncoder.build_model(
             auxFeatEncoder.model_th, cp_params)
    else:
        auxFeatEnc_inp = []
        xAux = None

    # Testing to see if diversity can be achieved by weighing words
    if params['word_freq_w'] != None:
        w_freq = json.load(open(params['word_freq_w'], 'r'))
        w_logw = np.zeros(len(misc['wordtoix']), dtype=np.float32)
        for w in w_freq:
            if w in misc['wordtoix']:
                w_logw[misc['wordtoix'][w]] = w_freq[w]
        w_logw = w_logw / w_logw[1:].min()
        w_logw[0] = w_logw.max()
        w_logw = -params['word_freq_sc'] * np.log(w_logw)
    else:
        w_logw = None

    BatchGenerator = decodeGenerator(cp_params)
    # Compile and init the theano predictor
    BatchGenerator.prepPredictor(model_npy,
                                 cp_params,
                                 params['beam_size'],
                                 xI,
                                 xAux,
                                 imgFeatEnc_inp + auxFeatEnc_inp,
                                 per_word_logweight=w_logw)
    model = BatchGenerator.model_th
    if params['greedy']:
        BatchGenerator.usegumbel.set_value(0)

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = copy(cp_params)
    if cp_params.get('class_out_factoring', 0) == 1:
        blob['checkpoint_params'].pop('ixtoclsinfo')
    blob['imgblobs'] = []

    N = len(img_names)

    # iterate over all images and predict sentences
    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    kwparams = {}

    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)

    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        D, NN = features.shape
        img = {}
        img['feat'] = features[:, feat_idx[n]].T
        img['img_idx'] = feat_idx[n]
        if cp_params.get('en_aux_inp', 0):
            img['aux_inp'] = aux_inp(
                aux_idx[n]) if aux_inp != [] else np.zeros(
                    cp_params['aux_inp_size'], dtype=np.float32)
            img['aux_idx'] = aux_idx[n] if aux_inp != [] else []
        img['local_file_path'] = img_names[n]
        # perform the work. heavy lifting happens inside
        enc_inp_list = prepare_seq_features(
            [{
                'image': img
            }],
            use_enc_for=cp_params.get('use_encoder_for', 0),
            use_shared_mem=cp_params.get('use_shared_mem_enc', 0))
        #import pdb;pdb.set_trace()
        Ys, Ax = BatchGenerator.predict([{
            'image': img
        }],
                                        cp_params,
                                        ext_inp=enc_inp_list)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[0] if params[
            'rescoreByLen'] == 0 else rescoreProbByLen(
                Ys[0]
            )  # take predictions for the first (and only) image we passed in
        top_predictions = sorted(top_predictions,
                                 key=lambda aa: aa[0],
                                 reverse=True)

        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        if cp_params.get('reverse_sentence', 0) == 0:
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
        else:
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in reversed(top_prediction[1])
                if ix > 0
            ])  # ix 0 is the END token, skip that
        #if candidate == '':
        #    import pdb;pdb.set_trace()
        if params['rescoreByLen'] == 0:
            print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
        else:
            print 'PRED: (%f, %f) %s' % (float(
                top_prediction[0]), float(top_prediction[2]), candidate)
        img_blob['candidate'] = {
            'text': candidate,
            'logprob': float(top_prediction[0])
        }

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[
                ci + 1]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            candlist.append({
                'text': candidate,
                'logprob': float(prediction[0])
            })

        img_blob['candidatelist'] = candlist
        blob['imgblobs'].append(img_blob)
        if (n % 5000) == 1:
            print 'writing predictions to %s...' % (save_file, )
            json.dump(blob, open(save_file, 'w'))

    # dump result struct to file
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))
def main(params):
  checkpoint_path = params['checkpoint_path']
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']
  model_npy = checkpoint['model']
  
  # Load the candidates db generated from rnn's
  candDb = json.load(open(params['candDb'],'r'))
  wordtoix = checkpoint['wordtoix']

  #find the number of candidates per image and max sentence len
  batch_size = 0
  maxlen = 0
  for i,img in enumerate(candDb['imgblobs']):
    for ids,cand in enumerate(img['candidatelist']):
        tks = cand['text'].split(' ')
        # Also tokenize the candidates
        candDb['imgblobs'][i]['candidatelist'][ids]['tokens'] = tks
        if len(tks) > maxlen:
            maxlen = len(tks)
    if batch_size < len(img['candidatelist']):
        batch_size = len(img['candidatelist'])

  # Get all images to this batch size!
  # HACK!!
  maxlen = 24
  checkpoint_params['maxlen'] = maxlen
 
  checkpoint_params['batch_size'] = batch_size
  print maxlen

  # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
  # at least word_count_threshold number of times
  
  # This initializes the model parameters and does matrix initializations  
  checkpoint_params['mode'] = 'predict' 
  evalModel = decodeEvaluator(checkpoint_params)
  model = evalModel.model_th
  
  # Define the computational graph for relating the input image features and word indices to the
  # log probability cost funtion. 
  (use_dropout, inp_list,
     f_pred_fns, cost, predTh, model) = evalModel.build_model(model, checkpoint_params)

  # Add the regularization cost. Since this is specific to trainig and doesn't get included when we 
  # evaluate the cost on test or validation data, we leave it here outside the model definition

  # Now let's build a gradient computation graph and rmsprop update mechanism
  # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images
  # Hence in case of coco/flickr this will 5* no of images
  ## Initialize the model parameters from the checkpoint file if we are resuming training
  zipp(model_npy,model)
  print("\nPredicting using model %s, run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_path, checkpoint['epoch'], \
    checkpoint['perplexity']))
  
  pos_samp = np.arange(1,dtype=np.int32)
  
  features,_ = loadArbitraryFeatures(params, -1)

  #Disable using dropout in training 
  use_dropout.set_value(0.)
  N = len(candDb['imgblobs'])
  #################### Main Loop ############################################
  for i,img in enumerate(candDb['imgblobs']):
    # fetch a batch of data
    print 'image %d/%d  \r' % (i, N),
    batch = []
    cbatch_len  = len(img['candidatelist'])
    for s in img['candidatelist']:
        batch.append({'sentence':s})
    if cbatch_len < batch_size:
        for z in xrange(batch_size - cbatch_len):
            batch.append({'sentence':img['candidatelist'][-1]})

    batch[0]['image'] = {'feat':features[:, img['imgid']]}
    real_inp_list, lenS = prepare_data(batch, wordtoix, maxlen=maxlen, pos_samp=pos_samp, prep_for=checkpoint_params['eval_model'])
    
    # evaluate cost, gradient and perform parameter update
    scrs = np.squeeze(f_pred_fns[1](*real_inp_list))
    scrs = scrs[:cbatch_len] # + scrs[:,cbatch_len:].sum()/cbatch_len
    for si,s in enumerate(img['candidatelist']):
        candDb['imgblobs'][i]['candidatelist'][si]['logprob'] = float(scrs[si])
        candDb['imgblobs'][i]['candidatelist'][si].pop('tokens')
    bestcand = scrs.argmax()
    candDb['imgblobs'][i]['candidate'] = candDb['imgblobs'][i]['candidatelist'][bestcand]
    srtidx = np.argsort(scrs)[::-1]
    candDb['imgblobs'][i]['candsort'] = list(srtidx)
    #import pdb;pdb.set_trace()
    # print training statistics

  print ""
  jsonFname = '%s_reranked_%s.json' % (checkpoint_params['eval_model'],params['fname_append'])
  save_file = os.path.join(params['root_path'], jsonFname)
  json.dump(candDb, open(save_file, 'w'))
def main(params):

  # load the checkpoint
  checkpoint_path = params['checkpoint_path']
  print 'loading checkpoint %s' % (checkpoint_path, )
  checkpoint = pickle.load(open(checkpoint_path, 'rb'))
  checkpoint_params = checkpoint['params']

  model_npy = checkpoint['model']
  misc = {}
  misc['wordtoix'] = checkpoint['wordtoix']
  ixtoword = checkpoint['ixtoword']

  if 'use_theano' not in  checkpoint_params:
    checkpoint_params['use_theano'] = 1
  
  checkpoint_params['use_theano'] = 1

  if 'image_feat_size' not in  checkpoint_params:
    checkpoint_params['image_feat_size'] = 4096 

  # output blob which we will dump to JSON for visualizing the results
  blob = {} 
  blob['params'] = params
  blob['checkpoint_params'] = checkpoint_params
  blob['imgblobs'] = []

  # load the tasks.txt file
  root_path = params['root_path']
  img_names_list = open(params['imgList'], 'r').read().splitlines()

  if len(img_names_list[0].rsplit(',')) > 2: 
    img_names = [x.rsplit (',')[0] for x in img_names_list]
    sentRaw = [x.rsplit (',')[1] for x in img_names_list]
    idxes = [int(x.rsplit (',')[2]) for x in img_names_list]
  elif len(img_names_list[0].rsplit(',')) == 2:
    img_names = [x.rsplit (',')[0] for x in img_names_list]
    sentRaw = [x.rsplit (',')[1] for x in img_names_list]
    idxes = xrange(len(img_names_list))
  else:
    print 'ERROR: List should atleast contain image name and a corresponding sentence'
    return

  if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file',None) == None):
    raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
    return
  # load the features for all images
  features, aux_inp = loadArbitraryFeatures(params, idxes)

  D,NN = features.shape
  N = len(img_names) 

  # iterate over all images and predict sentences
  BatchGenerator = decodeGenerator(checkpoint_params)
  BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy)
  eval_batch_size = params.get('eval_batch_size',100)
  wordtoix = checkpoint['wordtoix']
  
  gen_fprop = BatchGenerator.f_eval_other
  
  print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
    checkpoint['perplexity']))
  
  n = 0
  
  while n < N:
    print('image %d/%d:\r' % (n, N)),
    
    cbs = 0
    # encode the image
    batch = []
    while n < N and cbs < eval_batch_size:
        out = {}
        out['image'] = {'feat':features[:, n]}
        out['sentence'] = {'raw': sentRaw[n],'tokens':word_tokenize(sentRaw[n])}
        out['idx'] = n
        if checkpoint_params.get('en_aux_inp',0):
            out['image']['aux_inp'] = aux_inp[:, n]

        cbs += 1
        n += 1
        batch.append(out)
    
    inp_list, lenS = prepare_data(batch,wordtoix)

    # perform the work. heavy lifting happens inside
    eval_array = gen_fprop(*inp_list)

    for ix,x in enumerate(batch):
        # build up the output
        img_blob = {}
        img_blob['img_path'] = img_names[x['idx']]
        # encode the top prediction
        img_blob['candidate'] = {'text': x['sentence']['raw'], 'logprob': float(eval_array[0,ix])}
        blob['imgblobs'].append(img_blob)

  # dump result struct to file
  jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) 
  save_file = os.path.join(root_path, jsonFname)
  print 'writing predictions to %s...' % (save_file, )
  json.dump(blob, open(save_file, 'w'))
def main(params):

    # load the checkpoint
    checkpoint_path = params['checkpoint_path']
    print 'loading checkpoint %s' % (checkpoint_path, )
    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
    checkpoint_params = checkpoint['params']

    model_npy = checkpoint['model']
    misc = {}
    misc['wordtoix'] = checkpoint['wordtoix']
    ixtoword = checkpoint['ixtoword']

    if 'use_theano' not in checkpoint_params:
        checkpoint_params['use_theano'] = 1

    checkpoint_params['use_theano'] = 1

    if 'image_feat_size' not in checkpoint_params:
        checkpoint_params['image_feat_size'] = 4096

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file
    root_path = params['root_path']
    img_names_list = open(params['imgList'], 'r').read().splitlines()

    if len(img_names_list[0].rsplit(',')) > 2:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        sentRaw = [x.rsplit(',')[1] for x in img_names_list]
        idxes = [int(x.rsplit(',')[2]) for x in img_names_list]
    elif len(img_names_list[0].rsplit(',')) == 2:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        sentRaw = [x.rsplit(',')[1] for x in img_names_list]
        idxes = xrange(len(img_names_list))
    else:
        print 'ERROR: List should atleast contain image name and a corresponding sentence'
        return

    if checkpoint_params.get('en_aux_inp', 0) and (params.get(
            'aux_inp_file', None) == None):
        raise ValueError(
            'ERROR: please specify auxillary input feature using --aux_inp_file'
        )
        return
    # load the features for all images
    features, aux_inp = loadArbitraryFeatures(params, idxes)

    D, NN = features.shape
    N = len(img_names)

    # iterate over all images and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    BatchGenerator.build_eval_other_sent(BatchGenerator.model_th,
                                         checkpoint_params, model_npy)
    eval_batch_size = params.get('eval_batch_size', 100)
    wordtoix = checkpoint['wordtoix']

    gen_fprop = BatchGenerator.f_eval_other

    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    n = 0

    while n < N:
        print('image %d/%d:\r' % (n, N)),

        cbs = 0
        # encode the image
        batch = []
        while n < N and cbs < eval_batch_size:
            out = {}
            out['image'] = {'feat': features[:, n]}
            out['sentence'] = {
                'raw': sentRaw[n],
                'tokens': word_tokenize(sentRaw[n])
            }
            out['idx'] = n
            if checkpoint_params.get('en_aux_inp', 0):
                out['image']['aux_inp'] = aux_inp[:, n]

            cbs += 1
            n += 1
            batch.append(out)

        inp_list, lenS = prepare_data(batch, wordtoix)

        # perform the work. heavy lifting happens inside
        eval_array = gen_fprop(*inp_list)

        for ix, x in enumerate(batch):
            # build up the output
            img_blob = {}
            img_blob['img_path'] = img_names[x['idx']]
            # encode the top prediction
            img_blob['candidate'] = {
                'text': x['sentence']['raw'],
                'logprob': float(eval_array[0, ix])
            }
            blob['imgblobs'].append(img_blob)

    # dump result struct to file
    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))
Exemplo n.º 8
0
def main(params):

    # load the checkpoint
    if params['multi_model'] == 0:
        checkpoint_path = params['checkpoint_path']
        print 'loading checkpoint %s' % (checkpoint_path, )
        checkpoint = pickle.load(open(checkpoint_path, 'rb'))
        checkpoint_params = checkpoint['params']
        model_npy = checkpoint['model']
        checkpoint_params['use_theano'] = 1
        if 'image_feat_size' not in checkpoint_params:
            checkpoint_params['image_feat_size'] = 4096

        BatchGenerator = decodeGenerator(checkpoint_params)
        # Compile and init the theano predictor
        BatchGenerator.prepPredictor(model_npy, checkpoint_params,
                                     params['beam_size'])
        model = BatchGenerator.model_th
    else:
        BatchGenerator = []
        model_npy = []
        modelTh = []
        checkpoint_params = []
        for i, checkpoint_path in enumerate(params['checkpoint_path']):
            checkpoint = pickle.load(open(checkpoint_path, 'rb'))
            model_npy.append(checkpoint['model'])
            checkpoint_params.append(checkpoint['params'])
            checkpoint_params[i]['use_theano'] = 1
            BatchGenerator.append(decodeGenerator(checkpoint_params[i]))
            zipp(model_npy[i], BatchGenerator[i].model_th)
            modelTh.append(BatchGenerator[i].model_th)
            modelTh[i]['comb_weight'] = 1.0 / params['nmodels']

        BatchGenerator[0].prepMultiPredictor(modelTh, checkpoint_params,
                                             params['beam_size'],
                                             params['nmodels'])

    misc = {}
    ixtoword = checkpoint['ixtoword']
    misc['wordtoix'] = checkpoint['wordtoix']

    # output blob which we will dump to JSON for visualizing the results
    blob = {}
    blob['params'] = params
    blob['checkpoint_params'] = checkpoint_params
    blob['imgblobs'] = []

    # load the tasks.txt file and setupe feature loading
    root_path = params['root_path']
    img_names_list = open(params['imgList'], 'r').read().splitlines()

    if len(img_names_list[0].rsplit(',')) > 1:
        img_names = [x.rsplit(',')[0] for x in img_names_list]
        idxes = [int(x.rsplit(',')[1]) for x in img_names_list]
    else:
        img_names = img_names_list
        idxes = xrange(len(img_names_list))

    #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'):
    #  raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
    #  return
    # load the features for all images
    features, aux_inp = loadArbitraryFeatures(params, idxes)

    N = len(img_names)

    # iterate over all images and predict sentences
    print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
      checkpoint['perplexity']))

    kwparams = {'beam_size': params['beam_size']}

    jsonFname = 'result_struct_%s.json' % (params['fname_append'])
    save_file = os.path.join(root_path, jsonFname)

    for n in xrange(N):
        print 'image %d/%d:' % (n, N)

        # encode the image
        if params['multi_model'] == 0:
            D, NN = features.shape
            img = {}
            img['feat'] = features[:, n]
            if checkpoint_params.get('en_aux_inp', 0):
                img['aux_inp'] = aux_inp[:, n]
            img['local_file_path'] = img_names[n]
            # perform the work. heavy lifting happens inside
            Ys = BatchGenerator.predict([{
                'image': img
            }], model, checkpoint_params, **kwparams)
        else:
            kwparams['nmodels'] = params['nmodels']
            batch = []
            for i in xrange(params['nmodels']):
                img = {}
                img['feat'] = features[i][:, n]
                if checkpoint_params[i].get('en_aux_inp', 0):
                    img['aux_inp'] = aux_inp[i][:, n]
                img['local_file_path'] = img_names[n]
                batch.append({'image': img})
            Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params,
                                                **kwparams)

        # build up the output
        img_blob = {}
        img_blob['img_path'] = img['local_file_path']

        # encode the top prediction
        top_predictions = Ys[
            0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[
            0]  # these are sorted with highest on top
        candidate = ' '.join([
            ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0
        ])  # ix 0 is the END token, skip that
        print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate)
        img_blob['candidate'] = {
            'text': candidate,
            'logprob': float(top_prediction[0])
        }

        # Code to save all the other candidates
        candlist = []
        for ci in xrange(len(top_predictions) - 1):
            prediction = top_predictions[
                ci + 1]  # these are sorted with highest on top
            candidate = ' '.join([
                ixtoword[int(ix)] for ix in prediction[1] if ix > 0
            ])  # ix 0 is the END token, skip that
            candlist.append({
                'text': candidate,
                'logprob': float(prediction[0])
            })

        img_blob['candidatelist'] = candlist
        blob['imgblobs'].append(img_blob)
        if (n % 5000) == 1:
            print 'writing predictions to %s...' % (save_file, )
            json.dump(blob, open(save_file, 'w'))

    # dump result struct to file
    print 'writing predictions to %s...' % (save_file, )
    json.dump(blob, open(save_file, 'w'))

    # dump output html
    html = ''
    for img in blob['imgblobs']:
        html += '<img src="%s" height="400"><br>' % (img['img_path'], )
        html += '(%f) %s <br><br>' % (img['candidate']['logprob'],
                                      img['candidate']['text'])

    html_file = 'result_%s.html' % (params['fname_append'])
    html_file = os.path.join(root_path, html_file)
    print 'writing html result file to %s...' % (html_file, )
    open(html_file, 'w').write(html)