def loadfeatsandlabels(args): if args.visfeats == 1: imgIdLbl = open(args.imgidlabel, 'r').read().splitlines() cocoIdtoFeatIdx = {} for imgL in imgIdLbl: cocoIdtoFeatIdx[int(imgL.split()[1][1:-1])] = int( imgL.split()[0][1:]) # Now load the features: params = {} f_list = [] featN = [] if args.feats != None: from imagernn.data_provider import prepare_data, loadArbitraryFeatures for i, f in enumerate(args.feats): params['feat_file'] = f feat, _, feat_idx, _ = loadArbitraryFeatures(params) f_list.append(feat) featN.append( args.featNames[i] if args.featNames != None else 'feat_' + str(i)) cLabls = [] for l in args.clslabels: cL = open(l, 'r').read().splitlines() cLabls.append(cL) return f_list, cLabls, featN, cocoIdtoFeatIdx else: return [], [], [], []
def main(params): # load the checkpoint if params['multi_model'] == 0: checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 if 'misc' in checkpoint: misc = checkpoint['misc'] ixtoword = misc['ixtoword'] else: misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] checkpoint_params['softmax_smooth_factor'] = params['softmax_smooth_factor'] checkpoint_params['softmax_propogate'] = params['softmax_propogate'] if checkpoint_params.get('class_out_factoring',0) == 1: checkpoint_params['ixtoclsinfo'] = np.zeros((checkpoint_params['nClasses'],2),dtype=np.int32) ixtoclsinfo = misc['ixtoclsinfo'] checkpoint_params['ixtoclsinfo'][ixtoclsinfo[:,0]] = ixtoclsinfo[:,1:3] if checkpoint_params.get('sched_sampling_mode',None) !=None: checkpoint_params['sched_sampling_mode'] = None BatchGenerator = decodeGenerator(checkpoint_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size']) model = BatchGenerator.model_th else: BatchGenerator = [] model_npy = [] modelTh = [] checkpoint_params = [] for i,checkpoint_path in enumerate(params['checkpoint_path']): checkpoint = pickle.load(open(checkpoint_path, 'rb')) model_npy.append(checkpoint['model']) checkpoint_params.append(checkpoint['params']) checkpoint_params[i]['use_theano'] = 1 BatchGenerator.append(decodeGenerator(checkpoint_params[i])) zipp(model_npy[i],BatchGenerator[i].model_th) modelTh.append(BatchGenerator[i].model_th) modelTh[i]['comb_weight'] = 1.0/params['nmodels'] BatchGenerator[0].prepMultiPredictor(modelTh,checkpoint_params,params['beam_size'],params['nmodels']) # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = copy(checkpoint_params) if checkpoint_params.get('class_out_factoring',0) == 1: blob['checkpoint_params'].pop('ixtoclsinfo') blob['imgblobs'] = [] # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() auxidxes = [] if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] auxidxes = [int(x.rsplit(',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) > 1: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] else: img_names = img_names_list idxes = xrange(len(img_names_list)) #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'): # raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') # return # load the features for all images if checkpoint_params.get('swap_aux') == 0 or auxidxes == []: features, aux_inp = loadArbitraryFeatures(params, idxes, auxidxes=auxidxes) else: features, aux_inp = loadArbitraryFeatures(params, auxidxes, auxidxes=idxes) N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = { 'beam_size' : params['beam_size'] } jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image if params['multi_model'] == 0: D,NN = features.shape img = {} img['feat'] = features[:, n] if checkpoint_params.get('en_aux_inp',0): img['aux_inp'] = aux_inp[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) else: kwparams['nmodels'] = params['nmodels'] batch = [] for i in xrange(params['nmodels']): img = {} img['feat'] = features[i][:, n] if checkpoint_params[i].get('en_aux_inp',0): img['aux_inp'] = aux_inp[i][:, n] img['local_file_path'] =img_names[n] batch.append({'image':img}) Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top if checkpoint_params.get('reverse_sentence',0) == 0: candidate = ' '.join([ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that else: candidate = ' '.join([ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])} # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions)-1): prediction = top_predictions[ci+1] # these are sorted with highest on top candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that candlist.append({'text': candidate, 'logprob': float(prediction[0])}) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n%5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = 'result_%s.html' % (params['fname_append']) html_file = os.path.join(root_path, html_file) print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def main(params): checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) cp_params = checkpoint['params'] model_npy = checkpoint['model'] # Load the candidates db generated from rnn's if params['candDb'] != None: candDb = json.load(open(params['candDb'], 'r')) else: candDb = mergeRes(params) wordtoix = checkpoint[ 'wordtoix'] if 'wordtoix' in checkpoint else checkpoint['misc'][ 'wordtoix'] # Read labels and build cocoid to imgid Map if params['dataset'] == 'coco': lbls = open(params['lblF'], 'r').read().splitlines() objId2Imgid = {} for lb in lbls: objId2Imgid[str(int(lb.split()[1][1:-1]))] = int(lb.split()[0][1:]) features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, Ellipsis) elif params['dataset'] == 'msr-vtt': img_names_list = open(params['lblF'], 'r').read().splitlines() auxidxes = [] img_names = [x.rsplit(',')[0] for x in img_names_list] objId2Imgid = {imn.split('.')[0]: i for i, imn in enumerate(img_names)} if len(img_names_list[0].split(',', 1)) > 1: if type( ast.literal_eval(img_names_list[0].split( ',', 1)[1].strip())) == tuple: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[0] for x in img_names_list ] auxidxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[1] for x in img_names_list ] else: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip()) for x in img_names_list ] else: idxes = xrange(len(img_names_list)) params['poolmethod'] = cp_params['poolmethod'] if params[ 'poolmethod'] == None else params['poolmethod'] features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, idxes, auxidxes=auxidxes) elif params['dataset'] == 'lsmdc': if params['use_label_file'] == 1: params['poolmethod'] = cp_params['poolmethod'] if params[ 'poolmethod'] == None else params['poolmethod'] params['labels'] = cp_params['labels'] if params[ 'labels'] == None else params['labels'] params['featfromlbl'] = cp_params['featfromlbl'] if params[ 'featfromlbl'] == None else params['featfromlbl'] params['uselabel'] = cp_params['uselabel'] if params[ 'uselabel'] == None else params['uselabel'] else: params['uselabel'] = 0 img_names_list = open(params['lblF'], 'r').read().splitlines() img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] auxidxes = [] objId2Imgid = { osp.basename(imn).split('.')[0]: i for i, imn in enumerate(img_names) } #import pdb;pdb.set_trace() features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, idxes, auxidxes=auxidxes) if cp_params.get('use_encoder_for', 0) & 1: imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'], cp_params['sent_encoding_size'], cp_params, mdl_prefix='img_enc_', features=features.T) zipp(model_npy, imgFeatEncoder.model_th) (imgenc_use_dropout, imgFeatEnc_inp, xI, updatesLSTMImgFeat) = imgFeatEncoder.build_model( imgFeatEncoder.model_th, cp_params) else: xI = None imgFeatEnc_inp = [] if 'eval_model' not in cp_params: cp_params['eval_model'] = params['eval_model'] print 'Using evaluator module: ', cp_params['eval_model'] #find the number of candidates per image and max sentence len batch_size = 0 maxlen = 0 for i, img in enumerate(candDb['imgblobs']): for ids, cand in enumerate(img['candidatelist']): tks = cand['text'].split(' ') # Also tokenize the candidates candDb['imgblobs'][i]['candidatelist'][ids]['tokens'] = tks if len(tks) > maxlen: maxlen = len(tks) if batch_size < len(img['candidatelist']): batch_size = len(img['candidatelist']) # Get all images to this batch size! # HACK!! maxlen = 24 cp_params['maxlen'] = maxlen cp_params['batch_size'] = batch_size print maxlen # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times # This initializes the model parameters and does matrix initializations cp_params['mode'] = 'predict' evalModel = decodeEvaluator(cp_params) model = evalModel.model_th # Define the computational graph for relating the input image features and word indices to the # log probability cost funtion. (use_dropout, inp_list_eval, f_pred_fns, cost, predTh, modelUpd) = evalModel.build_model(model, cp_params, xI=xI, prior_inp_list=imgFeatEnc_inp) inp_list = imgFeatEnc_inp + inp_list_eval # Add the regularization cost. Since this is specific to trainig and doesn't get included when we # evaluate the cost on test or validation data, we leave it here outside the model definition # Now let's build a gradient computation graph and rmsprop update mechanism # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images # Hence in case of coco/flickr this will 5* no of images ## Initialize the model parameters from the checkpoint file if we are resuming training model = modelUpd if cp_params['eval_model'] == 'cnn' else model zipp(model_npy, model) print("\nPredicting using model %s, run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_path, checkpoint['epoch'], \ checkpoint['perplexity'])) pos_samp = np.arange( 1, dtype=np.int32) if cp_params['eval_model'] == 'cnn' else [] #Disable using dropout in training use_dropout.set_value(0.) if cp_params.get('use_encoder_for', 0) & 1: imgenc_use_dropout.set_value(0.) N = len(candDb['imgblobs']) stats = np.zeros((batch_size)) #################### Main Loop ############################################ for i, img in enumerate(candDb['imgblobs']): # fetch a batch of data print 'image %d/%d \r' % (i, N), batch = [] cbatch_len = len(img['candidatelist']) objid = osp.basename(img['img_path']).split('_')[-1].split('.')[0] if params['dataset'] == 'coco': objid = str(int(objid)) for s in img['candidatelist']: batch.append({ 'sentence': s, 'image': { 'feat': features[:, feat_idx[objId2Imgid[objid]]].T, 'img_idx': feat_idx[objId2Imgid[objid]] } }) if params['aux_inp_file'] != None: batch[-1]['aux_inp'] = aux_inp[:, aux_idx[objId2Imgid[objid]]].T if cbatch_len < batch_size and (cp_params['eval_model'] == 'cnn'): for z in xrange(batch_size - cbatch_len): batch.append({'sentence': img['candidatelist'][-1]}) enc_inp_list = prepare_seq_features( batch, use_enc_for=cp_params.get('use_encoder_for', 0), use_shared_mem=cp_params.get('use_shared_mem_enc', 0), pos_samp=pos_samp) eval_inp_list, lenS = prepare_data(batch, wordtoix, maxlen=maxlen, pos_samp=pos_samp, prep_for=cp_params['eval_model'], use_enc_for=cp_params.get( 'use_encoder_for', 0)) real_inp_list = enc_inp_list + eval_inp_list #import pdb;pdb.set_trace() # evaluate cost, gradient and perform parameter update scrs = np.squeeze(f_pred_fns[1](*real_inp_list)) scrs = scrs[:cbatch_len] # + scrs[:,cbatch_len:].sum()/cbatch_len for si, s in enumerate(img['candidatelist']): candDb['imgblobs'][i]['candidatelist'][si]['logprob'] = float( scrs[si]) candDb['imgblobs'][i]['candidatelist'][si].pop('tokens') bestcand = scrs.argmax() stats[bestcand] += 1.0 candDb['imgblobs'][i]['candidate'] = candDb['imgblobs'][i][ 'candidatelist'][bestcand] srtidx = np.argsort(scrs)[::-1] candDb['imgblobs'][i]['candsort'] = list(srtidx) # print training statistics print "" jsonFname = '%s_reranked_%s.json' % (cp_params['eval_model'], params['fname_append']) save_file = os.path.join(params['root_path'], jsonFname) json.dump(candDb, open(save_file, 'w')) print 'Written to file %s' % save_file print 'Final stats are:' print stats * 100.0 / N
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) cp_params = checkpoint['params'] if params['gen_model'] == None: model_npy = checkpoint[ 'model'] if 'model' in checkpoint else checkpoint['modelGen'] else: gen_cp = pickle.load(open(params['gen_model'], 'rb')) model_npy = gen_cp.get('model', {}) cp_params['use_theano'] = 1 if params['dobeamsearch']: cp_params['advers_gen'] = 0 if params['use_label_file'] == 1: params['poolmethod'] = cp_params['poolmethod'] if params[ 'poolmethod'] == None else params['poolmethod'] params['labels'] = cp_params['labels'] if params[ 'labels'] == None else params['labels'] params['featfromlbl'] = cp_params['featfromlbl'] if params[ 'featfromlbl'] == None else params['featfromlbl'] params['uselabel'] = cp_params['uselabel'] if params[ 'uselabel'] == None else params['uselabel'] else: params['uselabel'] = 0 print 'parsed parameters:' print json.dumps(params, indent=2) if 'image_feat_size' not in cp_params: cp_params['image_feat_size'] = 4096 if 'misc' in checkpoint: misc = checkpoint['misc'] ixtoword = misc['ixtoword'] else: misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] cp_params['softmax_smooth_factor'] = params['softmax_smooth_factor'] cp_params['softmax_propogate'] = params['softmax_propogate'] cp_params['computelogprob'] = params['computelogprob'] cp_params['greedy'] = params['greedy'] cp_params['gen_input_noise'] = 0 if cp_params.get('sched_sampling_mode', None) != None: cp_params['sched_sampling_mode'] = None # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() auxidxes = [] img_names = [x.rsplit(',')[0] for x in img_names_list] if len(img_names_list[0].split(',', 1)) > 1: if type(ast.literal_eval(img_names_list[0].split( ',', 1)[1].strip())) == tuple: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[0] for x in img_names_list ] auxidxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[1] for x in img_names_list ] else: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip()) for x in img_names_list ] else: idxes = xrange(len(img_names_list)) if cp_params.get('swap_aux') == 0 or auxidxes == []: features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, idxes, auxidxes=auxidxes) else: features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, auxidxes, auxidxes=idxes) ##-------------------------------- Setup the models --------------------------########### if cp_params.get('use_encoder_for', 0) & 1: imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'], cp_params['word_encoding_size'], cp_params, mdl_prefix='img_enc_', features=features.T) zipp(model_npy, imgFeatEncoder.model_th) (imgenc_use_dropout, imgFeatEnc_inp, xI, updatesLSTMImgFeat) = imgFeatEncoder.build_model( imgFeatEncoder.model_th, cp_params) else: xI = None imgFeatEnc_inp = [] if cp_params.get('use_encoder_for', 0) & 2: auxFeatEncoder = RecurrentFeatEncoder(cp_params['aux_inp_size'], cp_params['image_encoding_size'], cp_params, mdl_prefix='aux_enc_', features=aux_inp.T) zipp(model_npy, auxFeatEncoder.model_th) (auxenc_use_dropout, auxFeatEnc_inp, xAux, updatesLSTMAuxFeat) = auxFeatEncoder.build_model( auxFeatEncoder.model_th, cp_params) else: auxFeatEnc_inp = [] xAux = None # Testing to see if diversity can be achieved by weighing words if params['word_freq_w'] != None: w_freq = json.load(open(params['word_freq_w'], 'r')) w_logw = np.zeros(len(misc['wordtoix']), dtype=np.float32) for w in w_freq: if w in misc['wordtoix']: w_logw[misc['wordtoix'][w]] = w_freq[w] w_logw = w_logw / w_logw[1:].min() w_logw[0] = w_logw.max() w_logw = -params['word_freq_sc'] * np.log(w_logw) else: w_logw = None BatchGenerator = decodeGenerator(cp_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, cp_params, params['beam_size'], xI, xAux, imgFeatEnc_inp + auxFeatEnc_inp, per_word_logweight=w_logw) model = BatchGenerator.model_th if params['greedy']: BatchGenerator.usegumbel.set_value(0) # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = copy(cp_params) if cp_params.get('class_out_factoring', 0) == 1: blob['checkpoint_params'].pop('ixtoclsinfo') blob['imgblobs'] = [] N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = {} jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image D, NN = features.shape img = {} img['feat'] = features[:, feat_idx[n]].T img['img_idx'] = feat_idx[n] if cp_params.get('en_aux_inp', 0): img['aux_inp'] = aux_inp( aux_idx[n]) if aux_inp != [] else np.zeros( cp_params['aux_inp_size'], dtype=np.float32) img['aux_idx'] = aux_idx[n] if aux_inp != [] else [] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside enc_inp_list = prepare_seq_features( [{ 'image': img }], use_enc_for=cp_params.get('use_encoder_for', 0), use_shared_mem=cp_params.get('use_shared_mem_enc', 0)) #import pdb;pdb.set_trace() Ys, Ax = BatchGenerator.predict([{ 'image': img }], cp_params, ext_inp=enc_inp_list) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] if params[ 'rescoreByLen'] == 0 else rescoreProbByLen( Ys[0] ) # take predictions for the first (and only) image we passed in top_predictions = sorted(top_predictions, key=lambda aa: aa[0], reverse=True) top_prediction = top_predictions[ 0] # these are sorted with highest on top if cp_params.get('reverse_sentence', 0) == 0: candidate = ' '.join([ ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that else: candidate = ' '.join([ ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0 ]) # ix 0 is the END token, skip that #if candidate == '': # import pdb;pdb.set_trace() if params['rescoreByLen'] == 0: print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) else: print 'PRED: (%f, %f) %s' % (float( top_prediction[0]), float(top_prediction[2]), candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': float(top_prediction[0]) } # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ ci + 1] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candlist.append({ 'text': candidate, 'logprob': float(prediction[0]) }) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n % 5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def main(params): checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] # Load the candidates db generated from rnn's candDb = json.load(open(params['candDb'],'r')) wordtoix = checkpoint['wordtoix'] #find the number of candidates per image and max sentence len batch_size = 0 maxlen = 0 for i,img in enumerate(candDb['imgblobs']): for ids,cand in enumerate(img['candidatelist']): tks = cand['text'].split(' ') # Also tokenize the candidates candDb['imgblobs'][i]['candidatelist'][ids]['tokens'] = tks if len(tks) > maxlen: maxlen = len(tks) if batch_size < len(img['candidatelist']): batch_size = len(img['candidatelist']) # Get all images to this batch size! # HACK!! maxlen = 24 checkpoint_params['maxlen'] = maxlen checkpoint_params['batch_size'] = batch_size print maxlen # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times # This initializes the model parameters and does matrix initializations checkpoint_params['mode'] = 'predict' evalModel = decodeEvaluator(checkpoint_params) model = evalModel.model_th # Define the computational graph for relating the input image features and word indices to the # log probability cost funtion. (use_dropout, inp_list, f_pred_fns, cost, predTh, model) = evalModel.build_model(model, checkpoint_params) # Add the regularization cost. Since this is specific to trainig and doesn't get included when we # evaluate the cost on test or validation data, we leave it here outside the model definition # Now let's build a gradient computation graph and rmsprop update mechanism # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images # Hence in case of coco/flickr this will 5* no of images ## Initialize the model parameters from the checkpoint file if we are resuming training zipp(model_npy,model) print("\nPredicting using model %s, run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_path, checkpoint['epoch'], \ checkpoint['perplexity'])) pos_samp = np.arange(1,dtype=np.int32) features,_ = loadArbitraryFeatures(params, -1) #Disable using dropout in training use_dropout.set_value(0.) N = len(candDb['imgblobs']) #################### Main Loop ############################################ for i,img in enumerate(candDb['imgblobs']): # fetch a batch of data print 'image %d/%d \r' % (i, N), batch = [] cbatch_len = len(img['candidatelist']) for s in img['candidatelist']: batch.append({'sentence':s}) if cbatch_len < batch_size: for z in xrange(batch_size - cbatch_len): batch.append({'sentence':img['candidatelist'][-1]}) batch[0]['image'] = {'feat':features[:, img['imgid']]} real_inp_list, lenS = prepare_data(batch, wordtoix, maxlen=maxlen, pos_samp=pos_samp, prep_for=checkpoint_params['eval_model']) # evaluate cost, gradient and perform parameter update scrs = np.squeeze(f_pred_fns[1](*real_inp_list)) scrs = scrs[:cbatch_len] # + scrs[:,cbatch_len:].sum()/cbatch_len for si,s in enumerate(img['candidatelist']): candDb['imgblobs'][i]['candidatelist'][si]['logprob'] = float(scrs[si]) candDb['imgblobs'][i]['candidatelist'][si].pop('tokens') bestcand = scrs.argmax() candDb['imgblobs'][i]['candidate'] = candDb['imgblobs'][i]['candidatelist'][bestcand] srtidx = np.argsort(scrs)[::-1] candDb['imgblobs'][i]['candsort'] = list(srtidx) #import pdb;pdb.set_trace() # print training statistics print "" jsonFname = '%s_reranked_%s.json' % (checkpoint_params['eval_model'],params['fname_append']) save_file = os.path.join(params['root_path'], jsonFname) json.dump(candDb, open(save_file, 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit (',')[0] for x in img_names_list] sentRaw = [x.rsplit (',')[1] for x in img_names_list] idxes = [int(x.rsplit (',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) == 2: img_names = [x.rsplit (',')[0] for x in img_names_list] sentRaw = [x.rsplit (',')[1] for x in img_names_list] idxes = xrange(len(img_names_list)) else: print 'ERROR: List should atleast contain image name and a corresponding sentence' return if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file',None) == None): raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) D,NN = features.shape N = len(img_names) # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy) eval_batch_size = params.get('eval_batch_size',100) wordtoix = checkpoint['wordtoix'] gen_fprop = BatchGenerator.f_eval_other print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 while n < N: print('image %d/%d:\r' % (n, N)), cbs = 0 # encode the image batch = [] while n < N and cbs < eval_batch_size: out = {} out['image'] = {'feat':features[:, n]} out['sentence'] = {'raw': sentRaw[n],'tokens':word_tokenize(sentRaw[n])} out['idx'] = n if checkpoint_params.get('en_aux_inp',0): out['image']['aux_inp'] = aux_inp[:, n] cbs += 1 n += 1 batch.append(out) inp_list, lenS = prepare_data(batch,wordtoix) # perform the work. heavy lifting happens inside eval_array = gen_fprop(*inp_list) for ix,x in enumerate(batch): # build up the output img_blob = {} img_blob['img_path'] = img_names[x['idx']] # encode the top prediction img_blob['candidate'] = {'text': x['sentence']['raw'], 'logprob': float(eval_array[0,ix])} blob['imgblobs'].append(img_blob) # dump result struct to file jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) save_file = os.path.join(root_path, jsonFname) print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit(',')[0] for x in img_names_list] sentRaw = [x.rsplit(',')[1] for x in img_names_list] idxes = [int(x.rsplit(',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) == 2: img_names = [x.rsplit(',')[0] for x in img_names_list] sentRaw = [x.rsplit(',')[1] for x in img_names_list] idxes = xrange(len(img_names_list)) else: print 'ERROR: List should atleast contain image name and a corresponding sentence' return if checkpoint_params.get('en_aux_inp', 0) and (params.get( 'aux_inp_file', None) == None): raise ValueError( 'ERROR: please specify auxillary input feature using --aux_inp_file' ) return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) D, NN = features.shape N = len(img_names) # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params, model_npy) eval_batch_size = params.get('eval_batch_size', 100) wordtoix = checkpoint['wordtoix'] gen_fprop = BatchGenerator.f_eval_other print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 while n < N: print('image %d/%d:\r' % (n, N)), cbs = 0 # encode the image batch = [] while n < N and cbs < eval_batch_size: out = {} out['image'] = {'feat': features[:, n]} out['sentence'] = { 'raw': sentRaw[n], 'tokens': word_tokenize(sentRaw[n]) } out['idx'] = n if checkpoint_params.get('en_aux_inp', 0): out['image']['aux_inp'] = aux_inp[:, n] cbs += 1 n += 1 batch.append(out) inp_list, lenS = prepare_data(batch, wordtoix) # perform the work. heavy lifting happens inside eval_array = gen_fprop(*inp_list) for ix, x in enumerate(batch): # build up the output img_blob = {} img_blob['img_path'] = img_names[x['idx']] # encode the top prediction img_blob['candidate'] = { 'text': x['sentence']['raw'], 'logprob': float(eval_array[0, ix]) } blob['imgblobs'].append(img_blob) # dump result struct to file jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def main(params): # load the checkpoint if params['multi_model'] == 0: checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 BatchGenerator = decodeGenerator(checkpoint_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size']) model = BatchGenerator.model_th else: BatchGenerator = [] model_npy = [] modelTh = [] checkpoint_params = [] for i, checkpoint_path in enumerate(params['checkpoint_path']): checkpoint = pickle.load(open(checkpoint_path, 'rb')) model_npy.append(checkpoint['model']) checkpoint_params.append(checkpoint['params']) checkpoint_params[i]['use_theano'] = 1 BatchGenerator.append(decodeGenerator(checkpoint_params[i])) zipp(model_npy[i], BatchGenerator[i].model_th) modelTh.append(BatchGenerator[i].model_th) modelTh[i]['comb_weight'] = 1.0 / params['nmodels'] BatchGenerator[0].prepMultiPredictor(modelTh, checkpoint_params, params['beam_size'], params['nmodels']) misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 1: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] else: img_names = img_names_list idxes = xrange(len(img_names_list)) #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'): # raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') # return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = {'beam_size': params['beam_size']} jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image if params['multi_model'] == 0: D, NN = features.shape img = {} img['feat'] = features[:, n] if checkpoint_params.get('en_aux_inp', 0): img['aux_inp'] = aux_inp[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) else: kwparams['nmodels'] = params['nmodels'] batch = [] for i in xrange(params['nmodels']): img = {} img['feat'] = features[i][:, n] if checkpoint_params[i].get('en_aux_inp', 0): img['aux_inp'] = aux_inp[i][:, n] img['local_file_path'] = img_names[n] batch.append({'image': img}) Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': float(top_prediction[0]) } # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ ci + 1] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candlist.append({ 'text': candidate, 'logprob': float(prediction[0]) }) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n % 5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = 'result_%s.html' % (params['fname_append']) html_file = os.path.join(root_path, html_file) print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)