def __init__(self, checkpoint_path): checkpoint = pickle.load(open(checkpoint_path, 'rb')) self.model = checkpoint['model'] self.checkpoint_params = checkpoint['params'] self.decoder = decodeGenerator(self.checkpoint_params) self.ixtoword = checkpoint['ixtoword'] self.kwparams = { 'beam_size' : 5}
def RNNGenCost(batch, model, params, misc): """ cost function, returns cost and gradients for model """ regc = params["regc"] # regularization cost BatchGenerator = decodeGenerator(params) wordtoix = misc["wordtoix"] # forward the RNN on each image sentence pair # the generator returns a list of matrices that have word probabilities # and a list of cache objects that will be needed for backprop Ys, gen_caches = BatchGenerator.forward(batch, model, params, misc, predict_mode=False) # compute softmax costs for all generated sentences, and the gradients on top loss_cost = 0.0 dYs = [] logppl = 0.0 logppln = 0 for i, pair in enumerate(batch): img = pair["image"] # ground truth indeces for this sentence we expect to see gtix = [wordtoix[w] for w in pair["sentence"]["tokens"] if w in wordtoix] gtix.append(0) # don't forget END token must be predicted in the end! # fetch the predicted probabilities, as rows Y = Ys[i] maxes = np.amax(Y, axis=1, keepdims=True) e = np.exp(Y - maxes) # for numerical stability shift into good numerical range P = e / np.sum(e, axis=1, keepdims=True) loss_cost += -np.sum(np.log(1e-20 + P[range(len(gtix)), gtix])) # note: add smoothing to not get infs logppl += -np.sum(np.log2(1e-20 + P[range(len(gtix)), gtix])) # also accumulate log2 perplexities logppln += len(gtix) # lets be clever and optimize for speed here to derive the gradient in place quickly for iy, y in enumerate(gtix): P[iy, y] -= 1 # softmax derivatives are pretty simple dYs.append(P) # backprop the RNN grads = BatchGenerator.backward(dYs, gen_caches) # add L2 regularization cost and gradients reg_cost = 0.0 if regc > 0: for p in misc["regularize"]: mat = model[p] reg_cost += 0.5 * regc * np.sum(mat * mat) grads[p] += regc * mat # normalize the cost and gradient by the batch size batch_size = len(batch) reg_cost /= batch_size loss_cost /= batch_size for k in grads: grads[k] /= batch_size # return output in json out = {} out["cost"] = {"reg_cost": reg_cost, "loss_cost": loss_cost, "total_cost": loss_cost + reg_cost} out["grad"] = grads out["stats"] = {"ppl2": 2 ** (logppl / logppln)} return out
def main(params): # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def RNNGenCost(batch, model, params, misc): """ cost function, returns cost and gradients for model """ regc = params['regc'] # regularization cost BatchGenerator = decodeGenerator(params) wordtoix = misc['wordtoix'] # forward the RNN on each image sentence pair # the generator returns a list of matrices that have word probabilities # and a list of cache objects that will be needed for backprop Ys, gen_caches = BatchGenerator.forward(batch, model, params, misc, predict_mode = False) # compute softmax costs for all generated sentences, and the gradients on top loss_cost = 0.0 dYs = [] logppl = 0.0 logppln = 0 for i,pair in enumerate(batch): img = pair['image'] # ground truth indeces for this sentence we expect to see gtix = [ wordtoix[w] for w in pair['sentence'].split() if w in wordtoix ] gtix.append(0) # don't forget END token must be predicted in the end! # fetch the predicted probabilities, as rows Y = Ys[i] maxes = np.amax(Y, axis=1, keepdims=True) e = np.exp(Y - maxes) # for numerical stability shift into good numerical range P = e / np.sum(e, axis=1, keepdims=True) loss_cost += - np.sum(np.log(1e-20 + P[range(len(gtix)),gtix])) # note: add smoothing to not get infs logppl += - np.sum(np.log2(1e-20 + P[range(len(gtix)),gtix])) # also accumulate log2 perplexities logppln += len(gtix) # lets be clever and optimize for speed here to derive the gradient in place quickly for iy,y in enumerate(gtix): P[iy,y] -= 1 # softmax derivatives are pretty simple dYs.append(P) # backprop the RNN grads = BatchGenerator.backward(dYs, gen_caches) # add L2 regularization cost and gradients reg_cost = 0.0 if regc > 0: for p in misc['regularize']: mat = model[p] reg_cost += 0.5 * regc * np.sum(mat * mat) grads[p] += regc * mat # normalize the cost and gradient by the batch size batch_size = len(batch) reg_cost /= batch_size loss_cost /= batch_size for k in grads: grads[k] /= batch_size # return output in json out = {} out['cost'] = {'reg_cost' : reg_cost, 'loss_cost' : loss_cost, 'total_cost' : loss_cost + reg_cost} out['grad'] = grads out['stats'] = { 'ppl2' : 2 ** (logppl / logppln)} return out
def __init__(self, cpfile, taskfile): super(RNNComponent, self).__init__() checkpoint = pickle.load(open(cpfile, "rb")) self.params = checkpoint["params"] self.dataset = self.params["dataset"] self.model = checkpoint["model"] self.misc = {} self.misc["wordtoix"] = checkpoint["wordtoix"] self.ixtoword = checkpoint["ixtoword"] self.BatchGenerator = decodeGenerator(self.params) self.img_names = open(taskfile, "r").read().splitlines()
def predict(self, features): BatchGenerator = decodeGenerator(CHECKPOINT_PATH) img = {} img['feat'] = features[:, 0] kwparams = {'beam_size': self.BEAM_SIZE} Ys = BatchGenerator.predict([{ 'image': img }], self.language_model, self.checkpoint_params, **kwparams) top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ self.ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that return candidate
def get_sentences(file_name, feats_path): # load the tasks.txt file # load the features for all images features_struct = scipy.io.loadmat(feats_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape # N= 1 # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) img = {} img['feat'] = features[:, 0] #Cause N=1 img['local_file_path'] =file_name # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : 30 } # beam size set here Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that return str(candidate)
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] captions_res = [] for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] img_blob['id'] = img['id'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] flag = True for gtsent in references: if flag: print 'GT: ' + gtsent flag = False img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) captions_res.append({'image_id':img_blob['id'],'caption':candidate}) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # # now also evaluate test split perplexity # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) # blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w')) alg_name = params['checkpoint_path'].split('_')[1] res_file_name = params['out_dir']+'/captions_val_'+alg_name+'_results.json' json.dump(captions_res, open(res_file_name, 'w')) from eval_tools import metrics metrics.run(dataset,alg_name,params['out_dir'])
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference' + ` q `, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % ( len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] task_file = params['task_file'] img_names = open(task_file, 'r').read().splitlines() # load the features for all images ''' features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features print features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape ''' features_path = params['feature_file'] features = pickle.load(open(features_path)) features = features.T #features = features_struct['feats'] # this is a 4096 x N numpy array of features D, N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside kwparams = {'beam_size': params['beam_size']} tic = time.time() Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) toc = time.time() print 'image %d/%d: %f' % (n, N, toc - tic) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] img_blob['rnn_time'] = (toc - tic) img_blob['candidate'] = {'text': [], 'logprob': []} # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in for i in xrange(min(5, len(top_predictions))): top_prediction = top_predictions[i] candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate) img_blob['candidate']['text'] += [candidate] img_blob['candidate']['logprob'] += [top_prediction[0]] ''' top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) ''' #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file #save_file = os.path.join(root_path, 'result_struct.json') save_file = params['out_file'] print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html '''
def main(params): batch_size = params['batch_size'] word_count_threshold = params['word_count_threshold'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname #--------------------------------- Init data provider and load data+features #---------------------------------# # fetch the data provider dp = getDataProvider(params) params['aux_inp_size'] = params['featenc_hidden_size'] * params[ 'n_encgt_sent'] if params['encode_gt_sentences'] else dp.aux_inp_size params['featenc_hidden_size'] = params['featenc_hidden_size'] if params[ 'encode_gt_sentences'] else params['aux_inp_size'] params['image_feat_size'] = dp.img_feat_size print 'Image feature size is %d, and aux input size is %d' % ( params['image_feat_size'], params['aux_inp_size']) #--------------------------------- Preprocess sentences and build Vocabulary #---------------------------------# misc = { } # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times if params['checkpoint_file_name'] == 'None': if params['class_out_factoring'] == 0: misc['wordtoix'], misc[ 'ixtoword'], bias_init_vector = preProBuildWordVocab( dp.iterSentences('train'), word_count_threshold) else: [misc['wordtoix'], misc['classes'] ], [misc['ixtoword'], misc['clstotree'], misc['ixtoclsinfo'] ], [bias_init_vector, bias_init_inter_class ] = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold, params) params['nClasses'] = bias_init_inter_class.shape[0] params['ixtoclsinfo'] = misc['ixtoclsinfo'] else: misc = checkpoint_init['misc'] params['nClasses'] = checkpoint_init['params']['nClasses'] if 'ixtoclsinfo' in misc: params['ixtoclsinfo'] = misc['ixtoclsinfo'] params['vocabulary_size'] = len(misc['wordtoix']) params['output_size'] = len(misc['ixtoword']) # these should match though print len(misc['wordtoix']), len(misc['ixtoword']) #------------------------------ Initialize the solver/generator and build forward path #-----------------------# # Initialize the optimizer solver = Solver(params['solver']) # This initializes the model parameters and does matrix initializations lstmGenerator = decodeGenerator(params) model, misc['update'], misc['regularize'] = (lstmGenerator.model_th, lstmGenerator.update_list, lstmGenerator.regularize) # force overwrite here. The bias to the softmax is initialized to reflect word frequencies # This is a bit of a hack if params['checkpoint_file_name'] == 'None': model['bd'].set_value(bias_init_vector.astype(config.floatX)) if params['class_out_factoring'] == 1: model['bdCls'].set_value( bias_init_inter_class.astype(config.floatX)) #----------------- If we are using feature encoders ----------------------- # This mode can now also be used for encoding GT sentences. if params['use_encoder_for'] & 1: if params['encode_gt_sentences']: xI = tensor.zeros((batch_size, params['image_encoding_size'])) imgFeatEnc_inp = [] else: imgFeatEncoder = RecurrentFeatEncoder(params['image_feat_size'], params['word_encoding_size'], params, mdl_prefix='img_enc_', features=dp.features.T) mdlLen = len(model.keys()) model.update(imgFeatEncoder.model_th) assert (len(model.keys()) == (mdlLen + len(imgFeatEncoder.model_th.keys()))) misc['update'].extend(imgFeatEncoder.update_list) misc['regularize'].extend(imgFeatEncoder.regularize) (imgenc_use_dropout, imgFeatEnc_inp, xI, updatesLSTMImgFeat) = imgFeatEncoder.build_model(model, params) else: xI = None imgFeatEnc_inp = [] if params['use_encoder_for'] & 2: aux_enc_inp = model['Wemb'] if params[ 'encode_gt_sentences'] else dp.aux_inputs.T hid_size = params['featenc_hidden_size'] auxFeatEncoder = RecurrentFeatEncoder(hid_size, params['image_encoding_size'], params, mdl_prefix='aux_enc_', features=aux_enc_inp) mdlLen = len(model.keys()) model.update(auxFeatEncoder.model_th) assert (len(model.keys()) == (mdlLen + len(auxFeatEncoder.model_th.keys()))) misc['update'].extend(auxFeatEncoder.update_list) misc['regularize'].extend(auxFeatEncoder.regularize) (auxenc_use_dropout, auxFeatEnc_inp, xAux, updatesLSTMAuxFeat) = auxFeatEncoder.build_model(model, params) if params['encode_gt_sentences']: # Reshape it size(batch_size, n_gt, hidden_size) xAux = xAux.reshape( (-1, params['n_encgt_sent'], params['featenc_hidden_size'])) # Convert it to size (batch_size, n_gt*hidden_size xAux = xAux.flatten(2) else: auxFeatEnc_inp = [] xAux = None #--------------------------------- Initialize the Attention Network #-------------------------------# if params['use_attn'] != None: attnModel = AttentionNetwork(params['image_feat_size'], params['hidden_size'], params, mdl_prefix='attn_mlp_') mdlLen = len(model.keys()) model.update(attnModel.model_th) assert (len(model.keys()) == (mdlLen + len(attnModel.model_th.keys()))) misc['update'].extend(attnModel.update_list) misc['regularize'].extend(attnModel.regularize) attn_nw_func = attnModel.build_model else: attn_nw_func = None #--------------------------------- Build the language model graph #---------------------------------# # Define the computational graph for relating the input image features and word indices to the # log probability cost funtion. (use_dropout, inp_list_gen, f_pred_prob, cost, predTh, updatesLSTM) = lstmGenerator.build_model(model, params, xI, xAux, attn_nw=attn_nw_func) inp_list = imgFeatEnc_inp + auxFeatEnc_inp + inp_list_gen #--------------------------------- Cost function and gradient computations setup #---------------------------------# costGrad = cost[0] # Add class uncertainity to final cost #if params['class_out_factoring'] == 1: # costGrad += cost[2] # Add the regularization cost. Since this is specific to trainig and doesn't get included when we # evaluate the cost on test or validation data, we leave it here outside the model definition if params['regc'] > 0.: reg_cost = theano.shared(numpy_floatX(0.), name='reg_c') reg_c = tensor.as_tensor_variable(numpy_floatX(params['regc']), name='reg_c') reg_cost = 0. for p in misc['regularize']: reg_cost += (model[p]**2).sum() reg_cost *= 0.5 * reg_c costGrad += (reg_cost / params['batch_size']) # Compile an evaluation function.. Doesn't include gradients # To be used for validation set evaluation f_eval = theano.function(inp_list, cost, name='f_eval') # Now let's build a gradient computation graph and rmsprop update mechanism grads = tensor.grad(costGrad, wrt=model.values()) lr = tensor.scalar(name='lr', dtype=config.floatX) f_grad_shared, f_update, zg, rg, ud = solver.build_solver_model( lr, model, grads, inp_list, cost, params) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) #print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) #print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) #print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) #-------------------------------- Intialize the prediction path if needed by evaluator ----------------------------# evalKwargs = { 'eval_metric': params['eval_metric'], 'f_gen': lstmGenerator.predict, 'beamsize': params['eval_beamsize'] } if params['eval_metric'] != 'perplex': lstmGenerator.prepPredictor(None, params, params['eval_beamsize']) refToks, scr_info = eval_prep_refs('val', dp, params['eval_metric']) evalKwargs['refToks'] = refToks evalKwargs['scr_info'] = scr_info valMetOp = operator.gt else: valMetOp = operator.lt if params['met_to_track'] != []: trackMetargs = { 'eval_metric': params['met_to_track'], 'f_gen': lstmGenerator.predict, 'beamsize': params['eval_beamsize'] } lstmGenerator.prepPredictor(None, params, params['eval_beamsize']) refToks, scr_info = eval_prep_refs('val', dp, params['met_to_track']) trackMetargs['refToks'] = refToks trackMetargs['scr_info'] = scr_info #--------------------------------- Iterations and Logging intializations ------------------------------------------# # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images # Hence in case of coco/flickr this will 5* no of images num_sentences_total = dp.getSplitSize('train', ofwhat='sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max( 1, int(num_iters_one_epoch * eval_period_in_epochs)) top_val_sc = -1 smooth_train_ppl2 = len( misc['ixtoword']) # initially size of dictionary of confusion val_sc = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} #json_worker_status['params'] = params json_worker_status['history'] = [] len_hist = defaultdict(int) #Initialize Tracking the perplexity of train and val, with iters. train_perplex = [] val_perplex = [] trackSc_array = [] #-------------------------------------- Load previously saved model ------------------------------------------------# #- Initialize the model parameters from the checkpoint file if we are resuming training if params['checkpoint_file_name'] != 'None': zipp(model_init_from, model) if params['restore_grads'] == 1: zipp(rg_init, rg) #Copy trackers from previous checkpoint if 'trackers' in checkpoint_init: train_perplex = checkpoint_init['trackers']['train_perplex'] val_perplex = checkpoint_init['trackers']['val_perplex'] trackSc_array = checkpoint_init['trackers'].get('trackScores', []) print( """\nContinuing training from previous model\n. Already run for %0.2f epochs with validation perplx at %0.3f\n""" % (checkpoint_init['epoch'], checkpoint_init['perplexity'])) #-------------------------------------- MAIN LOOP ----------------------------------------------------------------# for it in xrange(max_iters): t0 = time.time() # Enable using dropout in training use_dropout.set_value(float(params['use_dropout'])) if params['use_encoder_for'] & 1: imgenc_use_dropout.set_value(float(params['use_dropout'])) if params['use_encoder_for'] & 2: auxenc_use_dropout.set_value(float(params['use_dropout'])) epoch = it * 1.0 / num_iters_one_epoch #-------------------------------------- Prepare batch-------------------------------------------# # fetch a batch of data if params['sample_by_len'] == 0: batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] else: batch, l = dp.getRandBatchByLen(batch_size) len_hist[l] += 1 enc_inp_list = prepare_seq_features( batch, use_enc_for=params['use_encoder_for'], maxlen=params['maxlen'], use_shared_mem=params['use_shared_mem_enc'], enc_gt_sent=params['encode_gt_sentences'], n_enc_sent=params['n_encgt_sent'], wordtoix=misc['wordtoix']) if params['use_pos_tag'] != 'None': gen_inp_list, lenS = prepare_data( batch, misc['wordtoix'], params['maxlen'], sentTagMap, misc['ixtoword'], rev_sents=params['reverse_sentence'], use_enc_for=params['use_encoder_for'], use_unk_token=params['use_unk_token']) else: gen_inp_list, lenS = prepare_data( batch, misc['wordtoix'], params['maxlen'], rev_sents=params['reverse_sentence'], use_enc_for=params['use_encoder_for'], use_unk_token=params['use_unk_token']) if params['sched_sampling_mode'] != None: gen_inp_list.append(epoch) real_inp_list = enc_inp_list + gen_inp_list #import ipdb; ipdb.set_trace() #---------------------------------- Compute cost and apply gradients ---------------------------# # evaluate cost, gradient and perform parameter update cost = f_grad_shared(*real_inp_list) f_update(params['learning_rate']) dt = time.time() - t0 # print training statistics train_ppl2 = (2**(cost[1] / lenS)) #step_struct['stats']['ppl2'] # smooth exponentially decaying moving average smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out total_cost = cost[0] if it == 0: smooth_cost = total_cost # start out where we start out smooth_cost = 0.99 * smooth_cost + 0.01 * total_cost #print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ # % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ # train_ppl2, smooth_train_ppl2) #---------------------------------- Write a report into a json file ---------------------------# tnow = time.time() if tnow > last_status_write_time + 60 * 1: # every now and then lets write a report print '%d/%d batch done in %.3fs. at epoch %.2f. Cost now is %.3f and pplx is %.3f' \ % (it, max_iters, dt, epoch, smooth_cost, smooth_train_ppl2) last_status_write_time = tnow jstatus = {} jstatus['time'] = datetime.datetime.now().isoformat() jstatus['iter'] = (it, max_iters) jstatus['epoch'] = (epoch, max_epochs) jstatus['time_per_batch'] = dt jstatus['smooth_train_ppl2'] = smooth_train_ppl2 jstatus['val_sc'] = val_sc # just write the last available one jstatus['val_metric'] = params[ 'eval_metric'] # just write the last available one jstatus['train_ppl2'] = train_ppl2 #if params['class_out_factoring'] == 1: # jstatus['class_cost'] = float(cost[2]) json_worker_status['history'].append(jstatus) status_file = os.path.join( params['worker_status_output_directory'], host + '_status.json') #import pdb; pdb.set_trace() try: json.dump(json_worker_status, open(status_file, 'w')) except Exception, e: # todo be more clever here print 'tried to write worker status into %s but got error:' % ( status_file, ) print e #--------------------------------- VALIDATION ---------------------------# #- perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it + 1) == max_iters if (((it + 1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: # Disable using dropout in validation use_dropout.set_value(0.) if params['use_encoder_for'] & 1: imgenc_use_dropout.set_value(0.) if params['use_encoder_for'] & 2: auxenc_use_dropout.set_value(0.) # perform the evaluation on VAL set val_sc = eval_split_theano('val', dp, model, params, misc, f_eval, **evalKwargs) val_sc = val_sc[0] val_perplex.append((it, val_sc)) train_perplex.append((it, smooth_train_ppl2)) if params['met_to_track'] != []: track_sc = eval_split_theano('val', dp, model, params, misc, f_eval, **trackMetargs) trackSc_array.append((it, { evm: track_sc[i] for i, evm in enumerate(params['met_to_track']) })) if epoch - params['lr_decay_st_epoch'] >= 0: params['learning_rate'] = params['learning_rate'] * params[ 'lr_decay'] params['lr_decay_st_epoch'] += 1 print 'validation %s = %f, lr = %f' % ( params['eval_metric'], val_sc, params['learning_rate']) #if params['sample_by_len'] == 1: # print len_hist #----------------------------- SAVE THE MODEL -------------------# write_checkpoint_ppl_threshold = params[ 'write_checkpoint_ppl_threshold'] if valMetOp(val_sc, top_val_sc) or top_val_sc < 0: if valMetOp(val_sc, write_checkpoint_ppl_threshold ) or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_sc = val_sc filename = 'model_checkpoint_%s_%s_%s_%s%.2f.p' % ( params['dataset'], host, params['fappend'], params['eval_metric'][:3], val_sc) filepath = os.path.join( params['checkpoint_output_directory'], filename) model_npy = unzip(model) rgrads_npy = unzip(rg) checkpoint = {} checkpoint['it'] = it checkpoint['epoch'] = epoch checkpoint['model'] = model_npy checkpoint['rgrads'] = rgrads_npy checkpoint['params'] = params checkpoint['perplexity'] = val_sc checkpoint['misc'] = misc checkpoint['trackers'] = { 'train_perplex': train_perplex, 'val_perplex': val_perplex, 'trackScores': trackSc_array } try: pickle.dump(checkpoint, open(filepath, "wb")) print 'saved checkpoint in %s' % (filepath, ) except Exception, e: # todo be more clever here print 'tried to write checkpoint into %s but got error: ' % ( filepath, ) print e
def main(params): batch_size = params['batch_size'] dataset = params['dataset'] word_count_threshold = params['word_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname params['mode'] = 'CPU' # fetch the data provider dp = getDataProvider(dataset) misc = { } # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc[ 'ixtoword'], bias_init_vector = preProBuildWordVocab( dp.iterSentences('train'), word_count_threshold) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) if params['mode'] == 'GPU': # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = gp.garray( bias_init_vector.reshape(1, bias_init_vector.size)) else: model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum( model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat='sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max( 1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len( misc['ixtoword']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] max_iters = 1 for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) # perform gradient check if desired, with a bit of a burnin time (10 iterations) #if it == 10 and do_grad_check: # solver.gradCheck(batch, model, costfun) # print 'done gradcheck. continue?' # raw_input() # ## detect if loss is exploding and kill the job if so #total_cost = cost['total_cost'] #if it == 0: # total_cost0 = total_cost # store this initial cost #if total_cost > total_cost0 * 2: # print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' # abort = True # set the abort flag, we'll break out # ## logging: write JSON files for visual inspection of the training #tnow = time.time() #if tnow > last_status_write_time + 60*1: # every now and then lets write a report # last_status_write_time = tnow # jstatus = {} # jstatus['time'] = datetime.datetime.now().isoformat() # jstatus['iter'] = (it, max_iters) # jstatus['epoch'] = (epoch, max_epochs) # jstatus['time_per_batch'] = dt # jstatus['smooth_train_ppl2'] = smooth_train_ppl2 # jstatus['val_ppl2'] = val_ppl2 # just write the last available one # jstatus['train_ppl2'] = train_ppl2 # json_worker_status['history'].append(jstatus) # status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json') # try: # json.dump(json_worker_status, open(status_file, 'w')) # except Exception, e: # todo be more clever here # print 'tried to write worker status into %s but got error:' % (status_file, ) # print e # ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good #is_last_iter = (it+1) == max_iters #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: # val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set # print 'validation perplexity = %f' % (val_ppl2, ) # write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] # if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: # if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # # if we beat a previous record or if this is the first time # # AND we also beat the user-defined threshold or it doesnt exist # top_val_ppl2 = val_ppl2 # filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2) # filepath = os.path.join(params['checkpoint_output_directory'], filename) # checkpoint = {} # checkpoint['it'] = it # checkpoint['epoch'] = epoch # checkpoint['model'] = model # checkpoint['params'] = params # checkpoint['perplexity'] = val_ppl2 # checkpoint['wordtoix'] = misc['wordtoix'] # checkpoint['ixtoword'] = misc['ixtoword'] # try: # pickle.dump(checkpoint, open(filepath, "wb")) # print 'saved checkpoint in %s' % (filepath, ) # except Exception, e: # todo be more clever here # print 'tried to write checkpoint into %s but got error: ' % (filepat, ) # print e cuda.close()
def main(params): # load the checkpoint checkpoint_path = params["checkpoint_path"] max_images = params["max_images"] print "loading checkpoint %s" % (checkpoint_path,) checkpoint = pickle.load(open(checkpoint_path, "rb")) checkpoint_params = checkpoint["params"] dataset = checkpoint_params["dataset"] model = checkpoint["model"] dump_folder = params["dump_folder"] if dump_folder: print "creating dump folder " + dump_folder os.system("mkdir -p " + dump_folder) ## ANAND - CHANGE TEST PATH # fetch the data provider # dp = getDataProvider(dataset) # pdb.set_trace() dp = getDataProvider("example_images") misc = {} misc["wordtoix"] = checkpoint["wordtoix"] ixtoword = checkpoint["ixtoword"] blob = {} # output blob which we will dump to JSON for visualizing the results blob["params"] = params blob["checkpoint_params"] = checkpoint_params blob["imgblobs"] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split="test", max_images=max_images): n += 1 print "image %d/%d:" % (n, max_images) # pdb.set_trace() references = [" ".join(x["tokens"]) for x in img["sentences"]] # as list of lists of tokens kwparams = {"beam_size": params["beam_size"]} Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob["img_path"] = img["local_file_path"] img_blob["imgid"] = img["imgid"] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img["local_file_path"] target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"])) os.system("cp %s %s" % (source_file, target_file)) # encode the human-provided references img_blob["references"] = [] for gtsent in references: print "GT: " + gtsent img_blob["references"].append({"text": gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print "PRED: (%f) %s" % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]} blob["imgblobs"].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print "writing intermediate files into eval/" open("eval/output", "w").write("\n".join(all_candidates)) for q in xrange(5): open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print "invoking eval/multi-bleu.perl script..." owd = os.getcwd() os.chdir("eval") os.system("./multi-bleu.perl reference < output") os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split("test", dp, model, checkpoint_params, misc, eval_max_images=max_images) print "perplexity of ground truth words based on dictionary of %d words: %f" % (len(ixtoword), gtppl) blob["gtppl"] = gtppl # dump result struct to file print "saving result struct to %s" % (params["result_struct_filename"],) json.dump(blob, open(params["result_struct_filename"], "w"))
def main(params): # load the checkpoint checkpoint_path = params["checkpoint_path"] max_images = params["max_images"] print "loading checkpoint %s" % (checkpoint_path,) checkpoint = pickle.load(open(checkpoint_path, "rb")) checkpoint_params = checkpoint["params"] dataset = checkpoint_params["dataset"] model_npy = checkpoint["model"] dump_folder = params["dump_folder"] if "use_theano" not in checkpoint_params: checkpoint_params["use_theano"] = 1 checkpoint_params["use_theano"] = 1 if "image_feat_size" not in checkpoint_params: checkpoint_params["image_feat_size"] = 4096 if dump_folder: print "creating dump folder " + dump_folder os.system("mkdir -p " + dump_folder) # fetch the data provider dp = getDataProvider(checkpoint_params) misc = {} misc["wordtoix"] = checkpoint["wordtoix"] ixtoword = checkpoint["ixtoword"] blob = {} # output blob which we will dump to JSON for visualizing the results blob["params"] = params blob["checkpoint_params"] = checkpoint_params blob["imgblobs"] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) if checkpoint_params["use_theano"] == 1: # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params["beam_size"]) model = BatchGenerator.model_th print ( "\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint["epoch"], checkpoint["perplexity"]) ) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split="test", max_images=max_images): n += 1 print "image %d/%d:" % (n, max_images) references = [" ".join(x["tokens"]) for x in img["sentences"]] # as list of lists of tokens kwparams = {"beam_size": params["beam_size"]} img["feat"] = np.random.rand(*img["feat"].shape) Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob["img_path"] = img["local_file_path"] img_blob["imgid"] = img["imgid"] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img["local_file_path"] target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"])) os.system("cp %s %s" % (source_file, target_file)) # encode the human-provided references img_blob["references"] = [] for gtsent in references: print "GT: " + gtsent img_blob["references"].append({"text": gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top # import pdb; pdb.set_trace() candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print "PRED: (%f) %s" % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob["candidate"] = {"text": candidate, "logprob": float(top_prediction[0])} # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ci + 1] # these are sorted with highest on top candidate = " ".join( [ixtoword[int(ix)] for ix in prediction[1] if ix > 0] ) # ix 0 is the END token, skip that candlist.append({"text": candidate, "logprob": float(prediction[0])}) img_blob["candidatelist"] = candlist blob["imgblobs"].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print "writing intermediate files into eval/" open("eval/output", "w").write("\n".join(all_candidates)) for q in xrange(5): open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print "invoking eval/multi-bleu.perl script..." owd = os.getcwd() os.chdir("eval") os.system("./multi-bleu.perl reference < output") os.chdir(owd) # now also evaluate test split perplexity # if checkpoint_params['use_theano'] == 0: # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) # else: # gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) # blob['gtppl'] = gtppl # # dump result struct to file print "saving result struct to %s" % (params["result_struct_filename"],) json.dump(blob, open(params["result_struct_filename"], "w"))
PJT_ROOT = '/works/neuraltalk/' MODEL_ROOT = '/storage/models/vgg/' PATH_MODEL_DEF_FILE = '%s/vgg_layer16_deploy_feature_relu7.prototxt' % MODEL_ROOT PATH_MODEL = '%s/vgg_layer16.caffemodel' % MODEL_ROOT WITH_GPU = 0 path_imgs = [] print "Feature Extraction for %d images starting now"%(len(path_imgs)) net = caffe_load_model(PATH_MODEL_DEF_FILE, PATH_MODEL, WITH_GPU) import pdb; pdb.set_trace() params = {} params['beam_size'] = 10 params['checkpoint_path'] = '%s/cv/coco/model_checkpoint_coco_SKP1002596MN001.local_baseline_11.14.p' % PJT_ROOT checkpoint = pickle.load(open(params['checkpoint_path'], 'rb')) BatchGenerator = decodeGenerator(checkpoint) checkpoint_params = checkpoint['params'] model = checkpoint['model'] ixtoword = checkpoint['ixtoword'] import pdb; pdb.set_trace() while True: path_imgs = [] path_imgs.append(raw_input("Input image: ")) start_time = time.time() features = caffe_extract_feats(net, path_imgs) print "Encoding in %.2f sec."%(time.time()-start_time) img = {} img['feat'] = features[:,0] kwparams = { 'beam_size' : params['beam_size'] }
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit(',')[0] for x in img_names_list] sentRaw = [x.rsplit(',')[1] for x in img_names_list] idxes = [int(x.rsplit(',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) == 2: img_names = [x.rsplit(',')[0] for x in img_names_list] sentRaw = [x.rsplit(',')[1] for x in img_names_list] idxes = xrange(len(img_names_list)) else: print 'ERROR: List should atleast contain image name and a corresponding sentence' return if checkpoint_params.get('en_aux_inp', 0) and (params.get( 'aux_inp_file', None) == None): raise ValueError( 'ERROR: please specify auxillary input feature using --aux_inp_file' ) return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) D, NN = features.shape N = len(img_names) # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params, model_npy) eval_batch_size = params.get('eval_batch_size', 100) wordtoix = checkpoint['wordtoix'] gen_fprop = BatchGenerator.f_eval_other print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 while n < N: print('image %d/%d:\r' % (n, N)), cbs = 0 # encode the image batch = [] while n < N and cbs < eval_batch_size: out = {} out['image'] = {'feat': features[:, n]} out['sentence'] = { 'raw': sentRaw[n], 'tokens': word_tokenize(sentRaw[n]) } out['idx'] = n if checkpoint_params.get('en_aux_inp', 0): out['image']['aux_inp'] = aux_inp[:, n] cbs += 1 n += 1 batch.append(out) inp_list, lenS = prepare_data(batch, wordtoix) # perform the work. heavy lifting happens inside eval_array = gen_fprop(*inp_list) for ix, x in enumerate(batch): # build up the output img_blob = {} img_blob['img_path'] = img_names[x['idx']] # encode the top prediction img_blob['candidate'] = { 'text': x['sentence']['raw'], 'logprob': float(eval_array[0, ix]) } blob['imgblobs'].append(img_blob) # dump result struct to file jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def main(params): batch_size = params["batch_size"] dataset = params["dataset"] word_count_threshold = params["word_count_threshold"] do_grad_check = params["do_grad_check"] max_epochs = params["max_epochs"] host = socket.gethostname() # get computer hostname # fetch the data provider dp = getDataProvider(dataset) misc = {} # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc["wordtoix"], misc["ixtoword"], bias_init_vector = preProBuildWordVocab( dp.iterSentences("train"), word_count_threshold ) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc["update"], misc["regularize"] = (init_struct["model"], init_struct["update"], init_struct["regularize"]) # force overwrite here. This is a bit of a hack, not happy about it model["bd"] = bias_init_vector.reshape(1, bias_init_vector.size) print "model init done." print "model has keys: " + ", ".join(model.keys()) print "updating: " + ", ".join("%s [%dx%d]" % (k, model[k].shape[0], model[k].shape[1]) for k in misc["update"]) print "updating: " + ", ".join("%s [%dx%d]" % (k, model[k].shape[0], model[k].shape[1]) for k in misc["regularize"]) print "number of learnable parameters total: %d" % ( sum(model[k].shape[0] * model[k].shape[1] for k in misc["update"]), ) if params.get("init_model_from", ""): # load checkpoint checkpoint = pickle.load(open(params["init_model_from"], "rb")) model = checkpoint["model"] # overwrite the model print checkpoint["model"] # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize("train", ofwhat="sentences") num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params["eval_period"] eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len(misc["ixtoword"]) # initially size of dictionary of confusion val_ppl2 = len(misc["ixtoword"]) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status["params"] = params json_worker_status["history"] = [] for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct["cost"] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct["stats"]["ppl2"] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print "%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)" % ( it, max_iters, dt, epoch, cost["loss_cost"], cost["reg_cost"], train_ppl2, smooth_train_ppl2, ) # perform gradient check if desired, with a bit of a burnin time (10 iterations) if it == 10 and do_grad_check: print "disabling dropout for gradient check..." params["drop_prob_encoder"] = 0 params["drop_prob_decoder"] = 0 solver.gradCheck(batch, model, costfun) print "done gradcheck, exitting." sys.exit() # hmmm. probably should exit here # detect if loss is exploding and kill the job if so total_cost = cost["total_cost"] if it == 0: total_cost0 = total_cost # store this initial cost if total_cost > total_cost0 * 2: print "Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?" abort = True # set the abort flag, we'll break out # logging: write JSON files for visual inspection of the training tnow = time.time() if tnow > last_status_write_time + 60 * 1: # every now and then lets write a report last_status_write_time = tnow jstatus = {} jstatus["time"] = datetime.datetime.now().isoformat() jstatus["iter"] = (it, max_iters) jstatus["epoch"] = (epoch, max_epochs) jstatus["time_per_batch"] = dt jstatus["smooth_train_ppl2"] = smooth_train_ppl2 jstatus["val_ppl2"] = val_ppl2 # just write the last available one jstatus["train_ppl2"] = train_ppl2 json_worker_status["history"].append(jstatus) status_file = os.path.join(params["worker_status_output_directory"], host + "_status.json") try: json.dump(json_worker_status, open(status_file, "w")) except Exception, e: # todo be more clever here print "tried to write worker status into %s but got error:" % (status_file,) print e # perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it + 1) == max_iters if (((it + 1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: val_ppl2 = eval_split("val", dp, model, params, misc) # perform the evaluation on VAL set print "validation perplexity = %f" % (val_ppl2,) # abort training if the perplexity is no good min_ppl_or_abort = params["min_ppl_or_abort"] if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0: print "aborting job because validation perplexity %f < %f" % (val_ppl2, min_ppl_or_abort) abort = True # abort the job write_checkpoint_ppl_threshold = params["write_checkpoint_ppl_threshold"] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = val_ppl2 filename = "model_checkpoint_%s_%s_%s_%.2f.p" % (dataset, host, params["fappend"], val_ppl2) filepath = os.path.join(params["checkpoint_output_directory"], filename) checkpoint = {} checkpoint["it"] = it checkpoint["epoch"] = epoch checkpoint["model"] = model checkpoint["params"] = params checkpoint["perplexity"] = val_ppl2 checkpoint["wordtoix"] = misc["wordtoix"] checkpoint["ixtoword"] = misc["ixtoword"] try: pickle.dump(checkpoint, open(filepath, "wb")) print "saved checkpoint in %s" % (filepath,) except Exception, e: # todo be more clever here print "tried to write checkpoint into %s but got error: " % (filepat,) print e
def main(params): # load the checkpoint if params['multi_model'] == 0: checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 BatchGenerator = decodeGenerator(checkpoint_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size']) model = BatchGenerator.model_th else: BatchGenerator = [] model_npy = [] modelTh = [] checkpoint_params = [] for i, checkpoint_path in enumerate(params['checkpoint_path']): checkpoint = pickle.load(open(checkpoint_path, 'rb')) model_npy.append(checkpoint['model']) checkpoint_params.append(checkpoint['params']) checkpoint_params[i]['use_theano'] = 1 BatchGenerator.append(decodeGenerator(checkpoint_params[i])) zipp(model_npy[i], BatchGenerator[i].model_th) modelTh.append(BatchGenerator[i].model_th) modelTh[i]['comb_weight'] = 1.0 / params['nmodels'] BatchGenerator[0].prepMultiPredictor(modelTh, checkpoint_params, params['beam_size'], params['nmodels']) misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 1: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] else: img_names = img_names_list idxes = xrange(len(img_names_list)) #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'): # raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') # return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = {'beam_size': params['beam_size']} jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image if params['multi_model'] == 0: D, NN = features.shape img = {} img['feat'] = features[:, n] if checkpoint_params.get('en_aux_inp', 0): img['aux_inp'] = aux_inp[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) else: kwparams['nmodels'] = params['nmodels'] batch = [] for i in xrange(params['nmodels']): img = {} img['feat'] = features[i][:, n] if checkpoint_params[i].get('en_aux_inp', 0): img['aux_inp'] = aux_inp[i][:, n] img['local_file_path'] = img_names[n] batch.append({'image': img}) Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': float(top_prediction[0]) } # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ ci + 1] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candlist.append({ 'text': candidate, 'logprob': float(prediction[0]) }) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n % 5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = 'result_%s.html' % (params['fname_append']) html_file = os.path.join(root_path, html_file) print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(dataset, params['pert']) dp.load_topic_models(dataset, params['lda']) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] # Added for CCA and perturbed dataset if params['cca']: pert_str = '' if params['pert']: pert_str = '_pert' ccaweights = np.loadtxt('cca/imageprojection_'+str(params['cca'])+pert_str+'.txt', delimiter = ',') misc['ccaweights'] = ccaweights else: ccaweights = None for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = { 'beam_size' : params['beam_size'], 'normalization': params['normalization'], 'ccaweights' : ccaweights } # Added for idf normalization if params['normalization']=='idf' or params['normalization']=='combined': idf = load_idf() kwparams['idf']=idf kwparams['words']=ixtoword else: kwparams['idf']=None kwparams['words']=None # Added for LDA if not params['lda'] == 0: Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams) else: Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): batch_size = params['batch_size'] word_count_threshold = params['word_count_threshold'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname # fetch the data provider dp = getDataProvider(params) # Initialize the optimizer solver = Solver(params['solver']) params['aux_inp_size'] = dp.aux_inp_size params['image_feat_size'] = dp.img_feat_size print 'Image feature size is %d, and aux input size is %d'%(params['image_feat_size'],params['aux_inp_size']) misc = {} # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold) params['vocabulary_size'] = len(misc['wordtoix']) params['output_size'] = len(misc['ixtoword']) # these should match though params['use_dropout'] = 1 # This initializes the model parameters and does matrix initializations generator = decodeGenerator(params) (gen_inp_list, predLogProb, predIdx, predCand, wOut_emb, updatesLstm) = generator.build_prediction_model( generator.model_th, params, params['beam_size']) wOut_emb = wOut_emb.reshape([wOut_emb.shape[0],wOut_emb.shape[2]]) f_gen_only = theano.function(gen_inp_list, [predLogProb, predIdx, wOut_emb], name='f_pred', updates=updatesLstm) modelGen = generator.model_th upListGen = generator.update_list if params['share_Wemb']: evaluator = decodeEvaluator(params, modelGen['Wemb']) else: evaluator = decodeEvaluator(params) modelEval = evaluator.model_th # Define the computational graph for relating the input image features and word indices to the # log probability cost funtion. (use_dropout_eval, eval_inp_list, f_pred_fns, costs, predTh, modelEval) = evaluator.build_advers_eval(modelEval, params, gen_inp_list, wOut_emb) # force overwrite here. The bias to the softmax is initialized to reflect word frequencies # This is a bit of a hack, not happy about it comb_inp_list = eval_inp_list for inp in gen_inp_list: if inp not in comb_inp_list: comb_inp_list.append(inp) # Compile an evaluation function.. Doesn't include gradients # To be used for validation set evaluation f_eval= theano.function(comb_inp_list, costs, name='f_eval', updates=updatesLstm) # Now let's build a gradient computation graph and rmsprop update mechanism if params['share_Wemb']: modelEval.pop('Wemb') if params['fix_Wemb']: upListGen.remove('Wemb') modelGenUpD = OrderedDict() for k in upListGen: modelGenUpD[k] = modelGen[k] gradsEval = tensor.grad(costs[0], wrt=modelEval.values(),add_names=True) gradsGen = tensor.grad(costs[1], wrt=modelGenUpD.values(), add_names=True) lrEval = tensor.scalar(name='lrEval',dtype=config.floatX) f_grad_comp_eval, f_param_update_eval, zg_eval, rg_eval, ud_eval= solver.build_solver_model(lrEval, modelEval, gradsEval, comb_inp_list, costs[0], params) lrGen = tensor.scalar(name='lrGen',dtype=config.floatX) f_grad_comp_gen, f_param_update_gen, zg_gen, rg_gen, ud_gen = solver.build_solver_model(lrGen, modelGenUpD, gradsGen, comb_inp_list, costs[1], params) print 'model init done.' print 'model has keys: ' + ', '.join(modelGen.keys()) # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images # Hence in case of coco/flickr this will 5* no of images num_sentences_total = dp.getSplitSize('train', ofwhat = 'images') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch iters_eval= num_iters_one_epoch//2 iters_gen = num_iters_one_epoch//4 eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs)) top_val_ppl2 = -1 smooth_train_ppl2 = 0.5 # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] len_hist = defaultdict(int) t_print_sec = 60 ## Initialize the model parameters from the checkpoint file if we are resuming training if params['checkpoint_file_name'] != 'None': zipp(model_init_from,modelGen) #zipp(rg_init,rgGen) print("\nContinuing training from previous model\n. Already run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_init['epoch'], \ checkpoint_init['perplexity'])) pos_samp = np.arange(batch_size,dtype=np.int32) print batch_size ############################################################## # Define signal handler to catch ctl-c or kills so that we can save the model trained till that point def signal_handler(signal, frame): print('You pressed Ctrl+C! Saving Checkpoint Now before exiting!') filename = 'advmodel_checkpoint_%s_%s_%s_%.2f_INT.p' % (params['dataset'], host, params['fappend'], val_ppl2) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) ############################################################## for it in xrange(max_epochs): epoch = it * 1.0 / num_iters_one_epoch # Enable using dropout in training use_dropout_eval.set_value(1.) for it2 in xrange(iters_eval): t0 = time.time() # fetch a batch of data batch,_ = dp.sampPosNegSentSamps(params['eval_batch_size'] - params['rand_negs']) real_inp_list, lenS = prepare_data(batch, misc['wordtoix'], maxlen=params['maxlen'], pos_samp=pos_samp, prep_for=params['eval_model'], rand_negs = params['rand_negs']) # evaluate cost, gradient and perform parameter update cost = f_grad_comp_eval(*real_inp_list) f_param_update_eval(params['learning_rate_eval']) dt = time.time() - t0 # Track training statistics train_ppl2 = (np.e**(-cost)) #step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it2 == 0: smooth_train_ppl2 = train_ppl2 if it2 == 0: smooth_train_cost = cost else: smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * cost tnow = time.time() if tnow > last_status_write_time + t_print_sec*1: # every now and then lets write a report print 'Eval Cnn in epoch %d: %d/%d sample done in %.3fs. Cost now is %.3f Pplx is %.3f' % (it, it2, iters_eval, dt, \ smooth_train_cost,smooth_train_ppl2) last_status_write_time = tnow print 'Done training the descriminative model for now. Switching to Genereative model' print 'Eval N/W in epoch %d: Cost now is %.3f Pplx is %.3f' % (it, smooth_train_cost,smooth_train_ppl2) filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_EVOnly.p' % (params['dataset'], host, params['fappend'],it, smooth_train_ppl2) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2) # Disable Cnn dropout while training gen network use_dropout_eval.set_value(0.) for it2 in xrange(iters_gen): t0 = time.time() # fetch a batch of data batch,_ = dp.sampPosNegSentSamps(params['eval_batch_size'] - params['rand_negs']) real_inp_list, lenS = prepare_data(batch, misc['wordtoix'], maxlen=params['maxlen'], pos_samp=pos_samp, prep_for=params['eval_model'], rand_negs = params['rand_negs']) #import pdb; pdb.set_trace() # evaluate cost, gradient and perform parameter update #if any([np.isnan(modelGen[m].get_value()).any() for m in modelGen]): # print 'Somebodys NAN!!!' # break; #asd = f_gen_only(real_inp_list[2],real_inp_list[3]) #print it2,asd[-1].shape, real_inp_list[0].shape #if asd[-1].shape[0] > real_inp_list[0].shape[0]: # import pdb; pdb.set_trace() cost = f_grad_comp_gen(*real_inp_list) #print it2,cost #if any([np.isnan(zg_gen[i].get_value()).any() for i in xrange(len(zg_gen))]): # print 'Somebody zg is NAN!!!' # break; #if any([np.isnan(rg_gen[i].get_value()).any() for i in xrange(len(rg_gen))]) or any([(rg_gen[i].get_value()<0).any() for i in xrange(len(rg_gen))]): # print 'Somebody rg is NAN!!!' # break; f_param_update_gen(params['learning_rate_gen']) dt = time.time() - t0 # print training statistics train_ppl2 = (np.e**(-cost)) #step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it2 == 0: smooth_train_ppl2 = train_ppl2 if it2 == 0: smooth_train_cost = cost else: smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * cost tnow = time.time() if tnow > last_status_write_time + t_print_sec*1: # every now and then lets write a report print 'Gen Lstm in epoch %d: %d/%d sample done in %.3fs. Cost now is %.3f Pplx is %.3f' % (it, it2, iters_gen, dt, \ smooth_train_cost,smooth_train_ppl2) last_status_write_time = tnow print 'Done training the generative model for now. Switching to Genereative model. Final Stats are:' print 'Gen Lstm in epoch %d: Cost now is %.3f Pplx is %.3f' % (it, smooth_train_cost,smooth_train_ppl2) ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it+1) == max_iters is_last_iter = 1 if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: # Disable using dropout in validation # use_dropout.set_value(0.) # val_ppl2 = eval_split_theano('val', dp, model, params, misc,f_eval) # perform the evaluation on VAL set # # if it - params['lr_decay_st_epoch'] >= 0: # params['learning_rate'] = params['learning_rate'] * params['lr_decay'] # params['lr_decay_st_epoch'] += 1 # # print 'validation perplexity = %f, lr = %f' % (val_ppl2, params['learning_rate']) # if params['sample_by_len'] == 1: # print len_hist val_ppl2 = smooth_train_ppl2 write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist #top_val_ppl2 = val_ppl2 filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_GenDone.p' % (params['dataset'], host, params['fappend'],it, smooth_train_ppl2) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2)
def gen_from_test(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] fout = params['output_file'] tempo = params['tempo'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] candidates = [] for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join( dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candidates.append(candidate) print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(1): open('eval/reference' + ` q `, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % ( len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file # print 'saving result struct to %s' % (params['result_struct_filename'], ) # json.dump(blob, open(params['result_struct_filename'], 'w')) for idx, c in enumerate(candidates): cs = c.split() for e in cs: es = e.split(';') pitch = int(es[0]) pos = es[1] pos = convert_pos(pos, idx) dur = es[2] dur = convert_dur(dur) note = pretty_midi.Note(90, pitch, pos, pos + dur) new_track.notes.append(note) new_midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo) new_midi_data.instruments.append(new_track) # pre-set chord preogression bass_track.notes.append(pretty_midi.Note(90, 36, 0, 1)) bass_track.notes.append(pretty_midi.Note(90, 47, 1, 2)) bass_track.notes.append(pretty_midi.Note(90, 45, 2, 3)) bass_track.notes.append(pretty_midi.Note(90, 43, 3, 4)) bass_track.notes.append(pretty_midi.Note(90, 41, 4, 5)) bass_track.notes.append(pretty_midi.Note(90, 40, 5, 6)) bass_track.notes.append(pretty_midi.Note(90, 38, 6, 7)) bass_track.notes.append(pretty_midi.Note(90, 43, 7, 8)) bass_track.notes.append(pretty_midi.Note(90, 36, 8, 9)) bass_track.notes.append(pretty_midi.Note(90, 47, 9, 10)) bass_track.notes.append(pretty_midi.Note(90, 45, 10, 11)) bass_track.notes.append(pretty_midi.Note(90, 43, 11, 12)) bass_track.notes.append(pretty_midi.Note(90, 41, 12, 13)) bass_track.notes.append(pretty_midi.Note(90, 40, 13, 14)) bass_track.notes.append(pretty_midi.Note(90, 38, 14, 15)) bass_track.notes.append(pretty_midi.Note(90, 43, 15, 16)) bass_track.notes.append(pretty_midi.Note(90, 45, 16, 17)) bass_track.notes.append(pretty_midi.Note(90, 41, 17, 18)) bass_track.notes.append(pretty_midi.Note(90, 36, 18, 19)) bass_track.notes.append(pretty_midi.Note(90, 43, 19, 20)) bass_track.notes.append(pretty_midi.Note(90, 45, 20, 21)) bass_track.notes.append(pretty_midi.Note(90, 41, 21, 22)) bass_track.notes.append(pretty_midi.Note(90, 43, 22, 23)) bass_track.notes.append(pretty_midi.Note(90, 43, 23, 24)) bass_track.notes.append(pretty_midi.Note(90, 36, 24, 25)) bass_track.notes.append(pretty_midi.Note(90, 47, 25, 26)) bass_track.notes.append(pretty_midi.Note(90, 45, 26, 27)) bass_track.notes.append(pretty_midi.Note(90, 43, 27, 28)) bass_track.notes.append(pretty_midi.Note(90, 41, 28, 29)) bass_track.notes.append(pretty_midi.Note(90, 40, 29, 30)) bass_track.notes.append(pretty_midi.Note(90, 38, 30, 31)) bass_track.notes.append(pretty_midi.Note(90, 43, 31, 32)) bass_track.notes.append(pretty_midi.Note(90, 36, 32, 33)) bass_track.notes.append(pretty_midi.Note(90, 47, 33, 34)) bass_track.notes.append(pretty_midi.Note(90, 45, 34, 35)) bass_track.notes.append(pretty_midi.Note(90, 43, 35, 36)) bass_track.notes.append(pretty_midi.Note(90, 41, 36, 37)) bass_track.notes.append(pretty_midi.Note(90, 40, 37, 38)) bass_track.notes.append(pretty_midi.Note(90, 38, 38, 39)) bass_track.notes.append(pretty_midi.Note(90, 43, 39, 40)) new_midi_data.instruments.append(bass_track) adjust_tempo(new_midi_data) if params['quantize']: quantize(new_midi_data) new_midi_data.write(fout)
def main(params, split): #import pdb; pdb.set_trace() batch_size = params['batch_size'] dataset = params['dataset'] feature_file = params['feature_file'] class_count_threshold = params['class_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname json_file = 'dataset_mmdb_book_fps_30_samplesize_25_split_%d.json' % ( split) # fetch the data provider dp = getDataProvider(dataset, feature_file, json_file) misc = { } # stores various misc items that need to be passed around the framework # go over all training classes and find the vocabulary we want to use, i.e. the classes that occur # at least class_count_threshold number of times misc['classtoix'], misc[ 'ixtoclass'], bias_init_vector = preProBuildWordVocab( dp.iterSentences('train'), class_count_threshold) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum( model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) if params.get('init_model_from', ''): # load checkpoint checkpoint = pickle.load(open(params['init_model_from'], 'rb')) model = checkpoint['model'] # overwrite the model # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat='sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max( 1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len( misc['ixtoclass']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoclass']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] lastsavedcheckpoint = '' for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) print 'last saved checkpoint in %s' % (lastsavedcheckpoint, ) # perform gradient check if desired, with a bit of a burnin time (10 iterations) if it == 10 and do_grad_check: print 'disabling dropout for gradient check...' params['drop_prob_encoder'] = 0 params['drop_prob_decoder'] = 0 solver.gradCheck(batch, model, costfun) print 'done gradcheck, exitting.' sys.exit() # hmmm. probably should exit here # detect if loss is exploding and kill the job if so total_cost = cost['total_cost'] if it == 0: total_cost0 = total_cost # store this initial cost if total_cost > total_cost0 * 2: print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' abort = True # set the abort flag, we'll break out # logging: write JSON files for visual inspection of the training tnow = time.time() if tnow > last_status_write_time + 60 * 1: # every now and then lets write a report last_status_write_time = tnow jstatus = {} jstatus['time'] = datetime.datetime.now().isoformat() jstatus['iter'] = (it, max_iters) jstatus['epoch'] = (epoch, max_epochs) jstatus['time_per_batch'] = dt jstatus['smooth_train_ppl2'] = smooth_train_ppl2 jstatus['val_ppl2'] = val_ppl2 # just write the last available one jstatus['train_ppl2'] = train_ppl2 json_worker_status['history'].append(jstatus) status_file = os.path.join( params['worker_status_output_directory'], host + '_status.json') try: json.dump(json_worker_status, open(status_file, 'w')) except Exception, e: # todo be more clever here print 'tried to write worker status into %s but got error:' % ( status_file, ) print e # perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it + 1) == max_iters if (((it + 1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set print 'validation perplexity = %f' % (val_ppl2, ) # abort training if the perplexity is no good min_ppl_or_abort = params['min_ppl_or_abort'] if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0: print 'aborting job because validation perplexity %f < %f' % ( val_ppl2, min_ppl_or_abort) abort = True # abort the job write_checkpoint_ppl_threshold = params[ 'write_checkpoint_ppl_threshold'] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = val_ppl2 filename = 'model_checkpoint_%s_%s_%s_alpha_%2.2f_beta_%2.2f_split_%d.p' % ( dataset, host, params['fappend'], params['alpha'], params['beta'], split) filepath = os.path.join( params['checkpoint_output_directory'], filename) checkpoint = {} checkpoint['it'] = it checkpoint['epoch'] = epoch checkpoint['model'] = model checkpoint['params'] = params checkpoint['perplexity'] = val_ppl2 checkpoint['classtoix'] = misc['classtoix'] checkpoint['ixtoclass'] = misc['ixtoclass'] checkpoint['json_file'] = json_file try: if not (params['fappend'] == 'test'): # if it == max_iters - 1 : pickle.dump(checkpoint, open(filepath, "wb")) print 'saved checkpoint in %s' % (filepath, ) lastsavedcheckpoint = filepath except Exception, e: # todo be more clever here print 'tried to write checkpoint into %s but got error: ' % ( filepath, ) print e
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file # TODO FIND EASY WAY TO CALL FILE WITH PROPER root root_path = params['root_path'] img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct[ 'feats'] # this is a 4096 x N numpy array of features D, N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] print img['local_file_path'] # perform the work. heavy lifting happens inside kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) print Ys # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction # top_predictions = Ys[0] # take predictions for the first (and only) image we passed in # top_prediction = top_predictions[0] # these are sorted with highest on top # candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that # print 'PRED: (%f) %s' % (top_prediction[0], candidate) # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} # blob['imgblobs'].append(img_blob) # encode the top prediction my attempt at showing all candidates img_blob['candidates'] = [] top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in print 'Number of name candidates', top_predictions # TODO TIME IT. SEEMS PRETTY FAST THOUGH for i in range(0, len(top_predictions)): top_prediction = top_predictions[ i] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print[ixtoword[ix] for ix in top_prediction[1] if ix > 0] print 'PRED: (%f) %s' % (top_prediction[0], candidate) img_blob['candidates'].append({ 'text': candidate, 'logprob': top_prediction[0] }) # VERY IMPORTANT LINE blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) #print 'Number of name candidates', len(img['candidates']) for i in range(0, len(img['candidates'])): html += '(%f) %s <br><br>' % (img['candidates'][i]['logprob'], img['candidates'][i]['text']) #print html html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def hold_comittee_discussion(params, com_dataset): n_memb = com_dataset['n_memb'] n_sent = com_dataset['n_sent'] n_imgs = len(com_dataset['images']) eval_array = np.zeros((n_memb, n_imgs * n_sent)) model_id = 0 for mod in com_dataset['members_model']: checkpoint = pickle.load(open(mod, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 checkpoint_params['data_file'] = params['jsonFname'].rsplit('/')[-1] dp = getDataProvider(checkpoint_params) ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params, model_npy) eval_batch_size = params.get('eval_batch_size', 100) eval_max_images = params.get('eval_max_images', -1) wordtoix = checkpoint['wordtoix'] split = 'test' print 'evaluating %s performance in batches of %d' % (split, eval_batch_size) logppl = 0 logppln = 0 nsent = 0 gen_fprop = BatchGenerator.f_eval_other blob['params'] = params c_id = 0 for batch in dp.iterImageSentencePairBatch( split=split, max_batch_size=eval_batch_size, max_images=eval_max_images): xWd, xId, maskd, lenS = dp.prepare_data(batch, wordtoix) eval_array[model_id, c_id:c_id + xWd.shape[1]] = gen_fprop(xWd, xId, maskd) c_id += xWd.shape[1] model_id += 1 # Calculate oracle scores bleu_array = eval_bleu_all_cand(params, com_dataset) eval_results = {} eval_results['logProb_feat'] = eval_array eval_results['OracleBleu'] = bleu_array #Save the mutual evaluations params['comResFname'] = 'committee_evalSc_%s.json' % (params['fappend']) com_dataset['com_evaluation'] = params['comResFname'] pickle.dump(eval_results, open(params['comResFname'], "wb")) json.dump(com_dataset, open(params['jsonFname'], 'w')) return eval_array
def __init__( self, model_def_file, pretrained_model_file, mean_file, raw_scale, class_labels_file, bet_file, image_dim, gpu_mode, ): logging.info("Loading net and associated files...") if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() ## load models # vgg16 self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.array([103.939, 116.779, 123.68]), channel_swap=(2, 1, 0), ) logging.info("Load vision model, %s", model_def_file) # googlenet self.net_google = caffe.Classifier( self.googlenet_args["model_def_file"], self.googlenet_args["pretrained_model_file"], image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.float32([104.0, 116.0, 122.0]), channel_swap=(2, 1, 0), ) logging.info("Load vision model, %s", self.googlenet_args["model_def_file"]) # language model self.rnn_params["beam_size"] = 10 self.rnn_checkpoint = cPickle.load(open(self.rnn_params["checkpoint_path"], "rb")) self.rnn_checkpoint_params = self.rnn_checkpoint["params"] self.rnn_model = self.rnn_checkpoint["model"] self.rnn_ixtoword = self.rnn_checkpoint["ixtoword"] self.rnn_BatchGenerator = decodeGenerator(self.rnn_checkpoint) self.rnn_kwparams = {"beam_size": self.rnn_params["beam_size"]} logging.info("Load LSTM model, %s", self.rnn_params["checkpoint_path"]) # generate N bit lookup table self.lookup = np.asarray([bin(i).count("1") for i in range(1 << 16)]) # load reference bit model file_reader = open(self.database_param, "rb") self.database = cPickle.load(file_reader) file_reader.close() logging.info("Load database from {}".format(self.database_param)) logging.info("database shape {}".format(self.database["ref"].shape)) with open(class_labels_file) as f: labels_df = pd.DataFrame( [ {"synset_id": l.strip().split(" ")[0], "name": " ".join(l.strip().split(" ")[1:]).split(",")[0]} for l in f.readlines() ] ) self.labels = labels_df.sort("synset_id")["name"].values self.bet = cPickle.load(open(bet_file)) # A bias to prefer children nodes in single-chain paths # I am setting the value to 0.1 as a quick, simple model. # We could use better psychological models here... self.bet["infogain"] -= np.array(self.bet["preferences"]) * 0.1
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit (',')[0] for x in img_names_list] sentRaw = [x.rsplit (',')[1] for x in img_names_list] idxes = [int(x.rsplit (',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) == 2: img_names = [x.rsplit (',')[0] for x in img_names_list] sentRaw = [x.rsplit (',')[1] for x in img_names_list] idxes = xrange(len(img_names_list)) else: print 'ERROR: List should atleast contain image name and a corresponding sentence' return if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file',None) == None): raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') return # load the features for all images features, aux_inp = loadArbitraryFeatures(params, idxes) D,NN = features.shape N = len(img_names) # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy) eval_batch_size = params.get('eval_batch_size',100) wordtoix = checkpoint['wordtoix'] gen_fprop = BatchGenerator.f_eval_other print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 while n < N: print('image %d/%d:\r' % (n, N)), cbs = 0 # encode the image batch = [] while n < N and cbs < eval_batch_size: out = {} out['image'] = {'feat':features[:, n]} out['sentence'] = {'raw': sentRaw[n],'tokens':word_tokenize(sentRaw[n])} out['idx'] = n if checkpoint_params.get('en_aux_inp',0): out['image']['aux_inp'] = aux_inp[:, n] cbs += 1 n += 1 batch.append(out) inp_list, lenS = prepare_data(batch,wordtoix) # perform the work. heavy lifting happens inside eval_array = gen_fprop(*inp_list) for ix,x in enumerate(batch): # build up the output img_blob = {} img_blob['img_path'] = img_names[x['idx']] # encode the top prediction img_blob['candidate'] = {'text': x['sentence']['raw'], 'logprob': float(eval_array[0,ix])} blob['imgblobs'].append(img_blob) # dump result struct to file jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) save_file = os.path.join(root_path, jsonFname) print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def run(checkpoint): max_images = -1 dump_folder = "" checkpoint_params = checkpoint["params"] dataset = checkpoint_params["dataset"] model = checkpoint["model"] beam_size = 1 # dump_folder = params['dump_folder'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc["wordtoix"] = checkpoint["wordtoix"] ixtoword = checkpoint["ixtoword"] blob = {} # output blob which we will dump to JSON for visualizing the results # blob['params'] = params blob["checkpoint_params"] = checkpoint_params blob["imgblobs"] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] captions_res = [] for img in dp.iterImages(split="test", max_images=max_images): n += 1 print "image %d/%d:" % (n, max_images) references = [" ".join(x["tokens"]) for x in img["sentences"]] # as list of lists of tokens kwparams = {"beam_size": beam_size} Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob["img_path"] = img["local_file_path"] img_blob["imgid"] = img["imgid"] img_blob["id"] = img["id"] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img["local_file_path"] target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"])) os.system("cp %s %s" % (source_file, target_file)) # encode the human-provided references img_blob["references"] = [] flag = True for gtsent in references: if flag: print "GT: " + gtsent flag = False img_blob["references"].append({"text": gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print "PRED: (%f) %s" % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) captions_res.append({"image_id": img_blob["id"], "caption": candidate}) img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]} blob["imgblobs"].append(img_blob) alg_name = checkpoint["algorithm"] res_file_name = checkpoint["outdir"] + "/captions_val_" + alg_name + "_results.json" json.dump(captions_res, open(res_file_name, "w")) from eval_tools import metrics scores = metrics.run(dataset, alg_name, checkpoint["outdir"]) return scores
def main(video_name): # load the checkpoint checkpoint_path = '/home/t-yuche/neuraltalk/models/flickr8k_cnn_lstm_v1.p' print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = os.path.join('/mnt/frames', video_name) all_frames = [ os.path.join('/mnt/frames/', video_name, x) for x in os.listdir(os.path.join('/mnt/frames', video_name)) ] # Load unprocessed frames to filenames fei_cap_data = load_video_caption('/mnt/tags/fei-caption-keyframe', video_name) processed_frames = [x['img_path'] for x in fei_cap_data] blob['imgblobs'] = blob['imgblobs'] + fei_cap_data img_names = [] for frame in all_frames: if frame not in processed_frames: img_names += [frame] # load the features for all images ''' features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features print features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape ''' features_path = os.path.join('/mnt/tags/fei-caption-all-pickle', video_name + '.pickle') features = pickle.load(open(features_path)) features = features.T #features = features_struct['feats'] # this is a 4096 x N numpy array of features D, N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside kwparams = {'beam_size': 20} tic = time.time() Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) toc = time.time() print 'image %d/%d: %f' % (n, N, toc - tic) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] img_blob['rnn_time'] = (toc - tic) img_blob['candidate'] = {'text': [], 'logprob': []} # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in for i in xrange(min(5, len(top_predictions))): top_prediction = top_predictions[i] candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate) img_blob['candidate']['text'] += [candidate] img_blob['candidate']['logprob'] += [top_prediction[0]] ''' top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) ''' #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file #save_file = os.path.join(root_path, 'result_struct.json') save_file = os.path.join('/mnt/tags/fei-caption-all', video_name + '_5_caption.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html '''
def predict_sentence(self, image): try: ################ FEATURE EXTRACTION ############## cnn_model_def = self.cnn_model_def cnn_model_params = self.cnn_model_params rnn_model = self.rnn_model def predict(in_data, net): """ Get the features for a batch of data using network Inputs: in_data: data batch """ out = net.forward(**{net.inputs[0]: in_data}) features = out[net.outputs[0]].squeeze(axis=(2,3)) return features def batch_predict(filenames, net): """ Get the features for all images from filenames using a network Inputs: filenames: a list of names of image files Returns: an array of feature vectors for the images in that file """ IMAGE_PATH = '/tmp/captionly_demo_uploads' N, C, H, W = net.blobs[net.inputs[0]].data.shape F = net.blobs[net.outputs[0]].data.shape[1] Nf = len(filenames) Hi, Wi, _ = imread(IMAGE_PATH + '/' + filenames[0]).shape allftrs = np.zeros((Nf, F)) for i in range(0, Nf, N): in_data = np.zeros((N, C, H, W), dtype=np.float32) batch_range = range(i, min(i+N, Nf)) batch_filenames = [filenames[j] for j in batch_range] Nb = len(batch_range) batch_images = np.zeros((Nb, 3, H, W)) for j,fname in enumerate(batch_filenames): im = imread(IMAGE_PATH + '/' + fname) if len(im.shape) == 2: im = np.tile(im[:,:,np.newaxis], (1,1,3)) # RGB -> BGR im = im[:,:,(2,1,0)] # mean subtraction im = im - np.array([103.939, 116.779, 123.68]) # resize im = imresize(im, (H, W)) # get channel in correct dimension im = np.transpose(im, (2, 0, 1)) batch_images[j,:,:,:] = im # insert into correct place in_data[0:len(batch_range), :, :, :] = batch_images # predict features ftrs = predict(in_data, net) for j in range(len(batch_range)): allftrs[i+j,:] = ftrs[j,:] print 'Done %d/%d files' % (i+len(batch_range), len(filenames)) return allftrs if self.gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() net = caffe.Net(cnn_model_def, cnn_model_params) caffe.set_phase_test() """ filenames = [] with open(args.files) as fp: for line in fp: filename = line.strip().split()[0] filenames.append(filename) """ filenames = ['2015-05-17_17:28:44.2513807EGRMwN.jpg'] allftrs = batch_predict(filenames, net) # # store the features in a pickle file # with open(args.out, 'w') as fp: # pickle.dump(allftrs, fp) # save to mat file print "Saving file to vgg_feats.mat..." io.savemat(UPLOAD_FOLDER + '/vgg_feats',{'feats':allftrs.T}) #################### PREDICTION ################## dim = 300 # load the checkpoint checkpoint_path = rnn_model # load glove vect dict glove_dict_path = '../../vecDict.pickle' with open(glove_dict_path, 'rb') as handle: vec_dict = pickle.load(handle) print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} #blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # create and load the tasks.txt file # root_path = params['root_path'] allImages = os.listdir(UPLOAD_FOLDER) with open(os.path.join(UPLOAD_FOLDER, 'tasks.txt'), 'w') as f: for k, v in enumerate(allImages): if k==len(allImages)-1: f.write(v) else: f.write(v + '\n') # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape fileNameToVector = {} # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) currSentenceVector = np.zeros(dim) numWords = 0 for word in candidate.split(): if word in vec_dict: currSentenceVector += vec_dict[word].astype(np.float) numWords += 1 currSentenceVector /= numWords fileNameToVector[img['local_file_path']] = currSentenceVector img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump the fileNameToVector mapping to a pickle file with open('fileNameToVector.pickle', 'wb') as handle: pickle.dump(fileNameToVector, handle) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html) return render_template("result.html", title = 'Results') # return (True, meta, result, '%.3f' % (endtime - starttime)) #img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # starttime = time.time() # scores = self.net.predict([image], oversample=True).flatten() # endtime = time.time() # indices = (-scores).argsort()[:5] # predictions = self.labels[indices] # # In addition to the prediction text, we will also produce # # the length for the progress bar visualization. # meta = [ # (p, '%.5f' % scores[i]) # for i, p in zip(indices, predictions) # ] # logging.info('result: %s', str(meta)) # # Compute expected information gain # expected_infogain = np.dot( # self.bet['probmat'], scores[self.bet['idmapping']]) # expected_infogain *= self.bet['infogain'] # # sort the scores # infogain_sort = expected_infogain.argsort()[::-1] # bet_result = [(self.bet['words'][v], '%.5f' % expected_infogain[v]) # for v in infogain_sort[:5]] # logging.info('bet result: %s', str(bet_result)) # return (True, meta, bet_result, '%.3f' % (endtime - starttime)) except Exception as err: logging.info('Classification error: %s', err) return (False, 'Something went wrong when classifying the ' 'image. Maybe try another one?')
def main(feature): """ description """ import cPickle as pickle #from scipy.io import loadmat import sys sys.path.append('/home/young/Desktop/ImageCaption') #sys.path.append('I:\json\neuraltalk-master\imagernn') from imagernn.solver import Solver from imagernn.imagernn_utils import decodeGenerator, eval_split #rootdir = 'I:\json\neuraltalk-master' #mat = loadmat(r'I:\json\neuraltalk-master\model\vgg_feats.mat') #feature = mat.get('feats') N = 1 # deal with images and predict sentence # load the checkpoint checkpoint_path_top5 = [ r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_18.47.p', \ r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_24.64.p', \ r'/home/young/Desktop/ImageCaption/model/model_checkpoint_coco_Caicai-PC_baseline_24.56.p'] blob_top3 = {} # dict to store the top5 generated sentences for i in range(1): checkpoint_path = checkpoint_path_top5[i] #print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) #print checkpoint.keys() checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = {} blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(1): print 'image %d/%d:' % (n+1, N) # ecode the image img = {} img['feat'] = feature[:, n] #img_names = open(test_file, 'r').read().splitlines() img['local_file_path'] = 'test.jpg' # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : 2 } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) blob_top3[str(i)] = blob return blob_top3
def hold_comittee_discussion(params, com_dataset): n_memb = com_dataset['n_memb'] n_sent = com_dataset['n_sent'] n_imgs = len(com_dataset['images']) eval_array = np.zeros((n_memb,n_imgs*n_sent)) model_id = 0 for mod in com_dataset['members_model']: checkpoint = pickle.load(open(mod, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 checkpoint_params['data_file'] = params['jsonFname'].rsplit('/')[-1] dp = getDataProvider(checkpoint_params) ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy) eval_batch_size = params.get('eval_batch_size',100) eval_max_images = params.get('eval_max_images', -1) wordtoix = checkpoint['wordtoix'] split = 'test' print 'evaluating %s performance in batches of %d' % (split, eval_batch_size) logppl = 0 logppln = 0 nsent = 0 gen_fprop = BatchGenerator.f_eval_other blob['params'] = params c_id = 0 for batch in dp.iterImageSentencePairBatch(split = split, max_batch_size = eval_batch_size, max_images = eval_max_images): xWd, xId, maskd, lenS = dp.prepare_data(batch,wordtoix) eval_array[model_id, c_id:c_id + xWd.shape[1]] = gen_fprop(xWd, xId, maskd) c_id += xWd.shape[1] model_id +=1 # Calculate oracle scores bleu_array = eval_bleu_all_cand(params,com_dataset) eval_results = {} eval_results['logProb_feat'] = eval_array eval_results['OracleBleu'] = bleu_array #Save the mutual evaluations params['comResFname'] = 'committee_evalSc_%s.json' % (params['fappend']) com_dataset['com_evaluation'] = params['comResFname'] pickle.dump(eval_results, open(params['comResFname'], "wb")) json.dump(com_dataset,open(params['jsonFname'], 'w')) return eval_array
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] task_file = params['task_file'] img_names = open(task_file, 'r').read().splitlines() video_name = task_file.split('/')[-1][:-10] caption_data = load_video_caption('/mnt/tags/fei-caption', video_name) blob['imgblobs'] = blob['imgblobs'] + caption_data # load the features for all images ''' features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features print features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape ''' features_path = params['feature_file'] features = pickle.load(open(features_path)) features = features.T #features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : params['beam_size'] } tic = time.time() Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) toc = time.time() print 'image %d/%d: %f' % (n, N, toc-tic) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] img_blob['rnn_time'] = (toc-tic) img_blob['candidate'] = {'text': [], 'logprob': []} # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in for i in xrange(min(5, len(top_predictions))): top_prediction = top_predictions[i] candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate) img_blob['candidate']['text'] += [candidate] img_blob['candidate']['logprob'] += [top_prediction[0]] ''' top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) ''' #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file #save_file = os.path.join(root_path, 'result_struct.json') save_file = params['out_file'] print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html '''
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] gt_dataset = params['gt_dataset'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] rootpath = '/home/lgp105b/xirong/VisualSearch' collection = 'flickr8k' version = 'baidu' fout = open(os.path.join(rootpath,collection,'SimilarityIndex','test_sent','%s.top20.sentid.txt'%dataset),'w') fout_s = open(os.path.join(rootpath,collection,'SimilarityIndex','test_sent','%s.top20.sentid.score.txt'%dataset),'w') if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] print "len(misc['wordtoix']):",len(misc['wordtoix']) ixtoword = checkpoint['ixtoword'] #get the groundtruth sentences encoded in model-dataset's chvob vob2idx = chinese_vob_idx(rootpath,collection,version) testset_filename = os.path.join(rootpath,collection,'Annotation','test_dataset.txt') test_ids = [x.strip() for x in open(testset_filename).readlines()] gt_filename = os.path.join(rootpath,collection,'seg.Flickr8k.token.Chinese.txt') testid2sentences = {} input_data = map(str.strip, open(gt_filename).readlines()) input_data = [x.decode('utf-8', 'ignore') for x in input_data] input_data = [x for x in input_data if x.split()[0][:-2] in test_ids] print len(input_data) #ignore if a word not in chvob or not in wodtoix(words occur more tham threshold) testid2sentences = encode_to_chvob(vob2idx, input_data) count_del = 0 for sid in testid2sentences.keys(): testid2sentences[sid] = [misc['wordtoix'][x] for x in testid2sentences[sid] if x in misc['wordtoix'].keys()] if len(testid2sentences[sid]) < 2: del testid2sentences[sid] count_del+=1 print '%d sentences cannot encoded with misx[wordtoix]'%count_del ''' sentences = {} for img in dp.iterImages(split = 'test', max_images = max_images): filename = img['filename'] for sent in img['sentences']: sentid = sent['sentid'] sentences['%s#%s'%(filename,sentid)] = [misc['wordtoix'][x] if x in misc['wordtoix'].keys() else 0 for x in sent['tokens']] #references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens #sentences[filename] = [[int(x) if int(x) <= len(misc['wordtoix']) else 0 for x in sentence.split()] for sentence in references] ''' # fetch the data provider dp = getDataProvider(dataset) blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 filename = img['filename'] print 'image %d/%d:%s' % (n, max_images,filename) #references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens #sentences = [[int(x) if x <= len(misc['wordtoix']) else 0 for x in sentence.split()] for sentence in references] #print sentences kwparams = { 'beam_size' : params['beam_size'] } top_sentences = BatchGenerator.sentence_relevance([{'image':img}], model, checkpoint_params, testid2sentences, **kwparams) fout.write('%s '%filename) output_line = '%s '%filename for x in top_sentences: for elem in x: fout.write('%s '%elem[0]) output_line += '%s %s '%(elem[0],elem[1]) output_line += '\n' print output_line fout.write('\n') fout_s.write(output_line) fout_s.flush() fout.close() fout_s.close()
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) cp_params = checkpoint['params'] if params['gen_model'] == None: model_npy = checkpoint[ 'model'] if 'model' in checkpoint else checkpoint['modelGen'] else: gen_cp = pickle.load(open(params['gen_model'], 'rb')) model_npy = gen_cp.get('model', {}) cp_params['use_theano'] = 1 if params['dobeamsearch']: cp_params['advers_gen'] = 0 if params['use_label_file'] == 1: params['poolmethod'] = cp_params['poolmethod'] if params[ 'poolmethod'] == None else params['poolmethod'] params['labels'] = cp_params['labels'] if params[ 'labels'] == None else params['labels'] params['featfromlbl'] = cp_params['featfromlbl'] if params[ 'featfromlbl'] == None else params['featfromlbl'] params['uselabel'] = cp_params['uselabel'] if params[ 'uselabel'] == None else params['uselabel'] else: params['uselabel'] = 0 print 'parsed parameters:' print json.dumps(params, indent=2) if 'image_feat_size' not in cp_params: cp_params['image_feat_size'] = 4096 if 'misc' in checkpoint: misc = checkpoint['misc'] ixtoword = misc['ixtoword'] else: misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] cp_params['softmax_smooth_factor'] = params['softmax_smooth_factor'] cp_params['softmax_propogate'] = params['softmax_propogate'] cp_params['computelogprob'] = params['computelogprob'] cp_params['greedy'] = params['greedy'] cp_params['gen_input_noise'] = 0 if cp_params.get('sched_sampling_mode', None) != None: cp_params['sched_sampling_mode'] = None # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() auxidxes = [] img_names = [x.rsplit(',')[0] for x in img_names_list] if len(img_names_list[0].split(',', 1)) > 1: if type(ast.literal_eval(img_names_list[0].split( ',', 1)[1].strip())) == tuple: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[0] for x in img_names_list ] auxidxes = [ ast.literal_eval(x.split(',', 1)[1].strip())[1] for x in img_names_list ] else: idxes = [ ast.literal_eval(x.split(',', 1)[1].strip()) for x in img_names_list ] else: idxes = xrange(len(img_names_list)) if cp_params.get('swap_aux') == 0 or auxidxes == []: features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, idxes, auxidxes=auxidxes) else: features, aux_inp, feat_idx, aux_idx = loadArbitraryFeatures( params, auxidxes, auxidxes=idxes) ##-------------------------------- Setup the models --------------------------########### if cp_params.get('use_encoder_for', 0) & 1: imgFeatEncoder = RecurrentFeatEncoder(cp_params['image_feat_size'], cp_params['word_encoding_size'], cp_params, mdl_prefix='img_enc_', features=features.T) zipp(model_npy, imgFeatEncoder.model_th) (imgenc_use_dropout, imgFeatEnc_inp, xI, updatesLSTMImgFeat) = imgFeatEncoder.build_model( imgFeatEncoder.model_th, cp_params) else: xI = None imgFeatEnc_inp = [] if cp_params.get('use_encoder_for', 0) & 2: auxFeatEncoder = RecurrentFeatEncoder(cp_params['aux_inp_size'], cp_params['image_encoding_size'], cp_params, mdl_prefix='aux_enc_', features=aux_inp.T) zipp(model_npy, auxFeatEncoder.model_th) (auxenc_use_dropout, auxFeatEnc_inp, xAux, updatesLSTMAuxFeat) = auxFeatEncoder.build_model( auxFeatEncoder.model_th, cp_params) else: auxFeatEnc_inp = [] xAux = None # Testing to see if diversity can be achieved by weighing words if params['word_freq_w'] != None: w_freq = json.load(open(params['word_freq_w'], 'r')) w_logw = np.zeros(len(misc['wordtoix']), dtype=np.float32) for w in w_freq: if w in misc['wordtoix']: w_logw[misc['wordtoix'][w]] = w_freq[w] w_logw = w_logw / w_logw[1:].min() w_logw[0] = w_logw.max() w_logw = -params['word_freq_sc'] * np.log(w_logw) else: w_logw = None BatchGenerator = decodeGenerator(cp_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, cp_params, params['beam_size'], xI, xAux, imgFeatEnc_inp + auxFeatEnc_inp, per_word_logweight=w_logw) model = BatchGenerator.model_th if params['greedy']: BatchGenerator.usegumbel.set_value(0) # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = copy(cp_params) if cp_params.get('class_out_factoring', 0) == 1: blob['checkpoint_params'].pop('ixtoclsinfo') blob['imgblobs'] = [] N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = {} jsonFname = 'result_struct_%s.json' % (params['fname_append']) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image D, NN = features.shape img = {} img['feat'] = features[:, feat_idx[n]].T img['img_idx'] = feat_idx[n] if cp_params.get('en_aux_inp', 0): img['aux_inp'] = aux_inp( aux_idx[n]) if aux_inp != [] else np.zeros( cp_params['aux_inp_size'], dtype=np.float32) img['aux_idx'] = aux_idx[n] if aux_inp != [] else [] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside enc_inp_list = prepare_seq_features( [{ 'image': img }], use_enc_for=cp_params.get('use_encoder_for', 0), use_shared_mem=cp_params.get('use_shared_mem_enc', 0)) #import pdb;pdb.set_trace() Ys, Ax = BatchGenerator.predict([{ 'image': img }], cp_params, ext_inp=enc_inp_list) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] if params[ 'rescoreByLen'] == 0 else rescoreProbByLen( Ys[0] ) # take predictions for the first (and only) image we passed in top_predictions = sorted(top_predictions, key=lambda aa: aa[0], reverse=True) top_prediction = top_predictions[ 0] # these are sorted with highest on top if cp_params.get('reverse_sentence', 0) == 0: candidate = ' '.join([ ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that else: candidate = ' '.join([ ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0 ]) # ix 0 is the END token, skip that #if candidate == '': # import pdb;pdb.set_trace() if params['rescoreByLen'] == 0: print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) else: print 'PRED: (%f, %f) %s' % (float( top_prediction[0]), float(top_prediction[2]), candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': float(top_prediction[0]) } # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ ci + 1] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candlist.append({ 'text': candidate, 'logprob': float(prediction[0]) }) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n % 5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w'))
def main(params): dim = 300 # load the checkpoint checkpoint_path = params['checkpoint_path'] # load glove vect dict #glove_dict_path = '../vecDict.pickle' #with open(glove_dict_path, 'rb') as handle: # vec_dict = pickle.load(handle) print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct[ 'feats'] # this is a 4096 x N numpy array of features D, N = features.shape fileNameToVector = {} # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # Calculate the sentence vector using avg of GLoVe word vectors #st = EnglishStemmer() #currSentenceVector = np.zeros(dim) #numWords = 0 #for word in candidate.split(): # if st.stem(word) in vec_dict: # currSentenceVector += vec_dict[st.stem(word)].astype(np.float) # numWords += 1 # elif st.stem(word)+'e' in vec_dict: # currSentenceVector += vec_dict[st.stem(word)+'e'].astype(np.float) # numWords += 1 #currSentenceVector /= numWords #fileNameToVector[img['local_file_path']] = currSentenceVector img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump the fileNameToVector mapping to a pickle file #with open('fileNameToVector.pickle', 'wb') as handle: # pickle.dump(fileNameToVector, handle) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) all_bleu_scores = [] n = 0 #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images): for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [x['tokens'] for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtwords in references: print 'GT: ' + ' '.join(gtwords) img_blob['references'].append({'text': ' '.join(gtwords)}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = [ixtoword[ix] for ix in top_prediction[1]] print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate)) bleu_scores = evalCandidate(candidate, references) print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores) img_blob['candidate'] = { 'text': ' '.join(candidate), 'logprob': top_prediction[0], 'bleu': bleu_scores } all_bleu_scores.append(bleu_scores) blob['imgblobs'].append(img_blob) print 'final average bleu scores:' bleu_averages = [ sum(x[i] for x in all_bleu_scores) * 1.0 / len(all_bleu_scores) for i in xrange(3) ] blob['final_result'] = {'bleu': bleu_averages} print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words: %f' % (gtppl, ) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(scriptparams): checkpoint = pickle.load(open(scriptparams['checkpoint'], 'rb')) npfilename = osp.join( 'scorelogs', osp.basename(scriptparams['checkpoint']).split('.')[0] + '_logprob%s' % (scriptparams['split'])) misc = checkpoint['misc'] # fetch the data provider params = checkpoint['params'] params['use_gumbel_mse'] = 0 params['maxlen'] = scriptparams['maxlen'] dp = getDataProvider(params) model_init_gen_from = checkpoint.get( 'model', {}) if 'model' in checkpoint else checkpoint['modelGen'] lstmGenerator = decodeGenerator(params) model, misc['update'], misc['regularize'] = (lstmGenerator.model_th, lstmGenerator.update_list, lstmGenerator.regularize) if params.get('use_encoder_for', 0) & 1: if params.get('encode_gt_sentences', 0): xI = tensor.zeros((batch_size, params['image_encoding_size'])) imgFeatEnc_inp = [] else: imgFeatEncoder = RecurrentFeatEncoder(params['image_feat_size'], params['word_encoding_size'], params, mdl_prefix='img_enc_', features=dp.features.T) mdlLen = len(model.keys()) model.update(imgFeatEncoder.model_th) assert (len(model.keys()) == (mdlLen + len(imgFeatEncoder.model_th.keys()))) misc['update'].extend(imgFeatEncoder.update_list) misc['regularize'].extend(imgFeatEncoder.regularize) (imgenc_use_dropout, imgFeatEnc_inp, xI, updatesLSTMImgFeat) = imgFeatEncoder.build_model(model, params) else: xI = None imgFeatEnc_inp = [] if params.get('use_encoder_for', 0) & 2: aux_enc_inp = model['Wemb'] if params.get('encode_gt_sentences', 0) else dp.aux_inputs.T hid_size = params['featenc_hidden_size'] auxFeatEncoder = RecurrentFeatEncoder(hid_size, params['image_encoding_size'], params, mdl_prefix='aux_enc_', features=aux_enc_inp) mdlLen = len(model.keys()) model.update(auxFeatEncoder.model_th) assert (len(model.keys()) == (mdlLen + len(auxFeatEncoder.model_th.keys()))) misc['update'].extend(auxFeatEncoder.update_list) misc['regularize'].extend(auxFeatEncoder.regularize) (auxenc_use_dropout, auxFeatEnc_inp, xAux, updatesLSTMAuxFeat) = auxFeatEncoder.build_model(model, params) if params.get('encode_gt_sentences', 0): # Reshape it size(batch_size, n_gt, hidden_size) xAux = xAux.reshape( (-1, params['n_encgt_sent'], params['featenc_hidden_size'])) # Convert it to size (batch_size, n_gt*hidden_size xAux = xAux.flatten(2) else: auxFeatEnc_inp = [] xAux = None attn_nw_func = None (use_dropout, inp_list_gen, f_pred_prob, cost, predTh, updatesLSTM) = lstmGenerator.build_model(model, params, xI, xAux, attn_nw=attn_nw_func) inp_list = imgFeatEnc_inp + auxFeatEnc_inp + inp_list_gen f_eval = theano.function(inp_list, cost, name='f_eval') #--------------------------------- Cost function and gradient computations setup #---------------------------------# zipp(model_init_gen_from, model) # perform the evaluation on VAL set #val_sc = eval_split_theano(scriptparams['split'], dp, model, params, misc, f_eval) logppl = [] logppln = [] imgids = [] nsent = 0 for batch in dp.iterImageSentencePairBatch(split=scriptparams['split'], max_batch_size=1, max_images=-1): enc_inp_list = prepare_seq_features( batch, use_enc_for=params.get('use_encoder_for', 0), maxlen=params['maxlen'], use_shared_mem=params.get('use_shared_mem_enc', 0), enc_gt_sent=params.get('encode_gt_sentences', 0), n_enc_sent=params.get('n_encgt_sent', 0), wordtoix=misc['wordtoix']) gen_inp_list, lenS = prepare_data( batch, misc['wordtoix'], rev_sents=params.get('reverse_sentence', 0), use_enc_for=params.get('use_encoder_for', 0), use_unk_token=params.get('use_unk_token', 0)) inp_list = enc_inp_list + gen_inp_list cost = f_eval(*inp_list) logppl.append(cost[1]) logppln.append(lenS) imgids.append( str(batch[0]['image']['cocoid']) + '_' + str(batch[0]['sentidx'])) nsent += 1 perplex = 2**(np.array(logppl) / np.array(logppln)) np.savez(npfilename, pplx=perplex, keys=np.array(imgids)) #ppl2 = 2 ** (logppl / logppln) #print 'evaluated %d sentences and got perplexity = %f' % (nsent, ppl2) #met = [ppl2] print 2**(np.array(logppl).sum() / np.array(logppln).sum())
def main(params): batch_size = params['batch_size'] dataset = params['dataset'] word_count_threshold = params['word_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname params['mode'] = 'CPU' # fetch the data provider dp = getDataProvider(dataset) misc = {} # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) if params['mode'] == 'GPU': # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = gp.garray(bias_init_vector.reshape(1, bias_init_vector.size)) else: model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] max_iters = 1 for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) # perform gradient check if desired, with a bit of a burnin time (10 iterations) #if it == 10 and do_grad_check: # solver.gradCheck(batch, model, costfun) # print 'done gradcheck. continue?' # raw_input() # ## detect if loss is exploding and kill the job if so #total_cost = cost['total_cost'] #if it == 0: # total_cost0 = total_cost # store this initial cost #if total_cost > total_cost0 * 2: # print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' # abort = True # set the abort flag, we'll break out # ## logging: write JSON files for visual inspection of the training #tnow = time.time() #if tnow > last_status_write_time + 60*1: # every now and then lets write a report # last_status_write_time = tnow # jstatus = {} # jstatus['time'] = datetime.datetime.now().isoformat() # jstatus['iter'] = (it, max_iters) # jstatus['epoch'] = (epoch, max_epochs) # jstatus['time_per_batch'] = dt # jstatus['smooth_train_ppl2'] = smooth_train_ppl2 # jstatus['val_ppl2'] = val_ppl2 # just write the last available one # jstatus['train_ppl2'] = train_ppl2 # json_worker_status['history'].append(jstatus) # status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json') # try: # json.dump(json_worker_status, open(status_file, 'w')) # except Exception, e: # todo be more clever here # print 'tried to write worker status into %s but got error:' % (status_file, ) # print e # ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good #is_last_iter = (it+1) == max_iters #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: # val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set # print 'validation perplexity = %f' % (val_ppl2, ) # write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] # if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: # if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # # if we beat a previous record or if this is the first time # # AND we also beat the user-defined threshold or it doesnt exist # top_val_ppl2 = val_ppl2 # filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2) # filepath = os.path.join(params['checkpoint_output_directory'], filename) # checkpoint = {} # checkpoint['it'] = it # checkpoint['epoch'] = epoch # checkpoint['model'] = model # checkpoint['params'] = params # checkpoint['perplexity'] = val_ppl2 # checkpoint['wordtoix'] = misc['wordtoix'] # checkpoint['ixtoword'] = misc['ixtoword'] # try: # pickle.dump(checkpoint, open(filepath, "wb")) # print 'saved checkpoint in %s' % (filepath, ) # except Exception, e: # todo be more clever here # print 'tried to write checkpoint into %s but got error: ' % (filepat, ) # print e cuda.close()
def gen_from_scratch(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] fout = params['output_file'] tempo = params['tempo'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] params['dataset'] = dataset model = checkpoint['model'] dump_folder = params['dump_folder'] ixtoword = checkpoint['ixtoword'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # Generate the chord sequence parts, chords, num_chords, num_parts = chord_sequence_generation.main( params) imgs = two_hot_encoding(parts, chords, num_chords, num_parts) blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 candidates = [] for img in imgs: n += 1 print 'image %d/%d:' % (n, max_images) kwparams = {'beam_size': params['beam_size']} img_dict = {'feat': img} Ys = BatchGenerator.predict([{ 'image': img_dict }], model, checkpoint_params, **kwparams) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candidates.append(candidate) print 'PRED: (%f) %s' % (top_prediction[0], candidate) # Write midi for idx, c in enumerate(candidates): cs = c.split() for e in cs: es = e.split(';') pitch = int(es[0]) pos = es[1] pos = convert_pos(pos, idx) dur = es[2] dur = convert_dur(dur) note = pretty_midi.Note(90, pitch, pos, pos + dur) new_track.notes.append(note) new_midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo) new_midi_data.instruments.append(new_track) # pre-set chord preogression chord_names = [ 'C;Em', 'A#;F', 'Dm;Em', 'Dm;G', 'Dm;C', 'Am;Em', 'F;C', 'F;G', 'Dm;F', 'C;C', 'C;E', 'Am;G', 'F;Em', 'F;F', 'G;G', 'Am;Am', 'Dm;Dm', 'C;A#', 'Em;F', 'C;G', 'G#;A#', 'F;Am', 'G#;Fm', 'Am;Gm', 'F;E', 'Dm;Am', 'Em;Em', 'G#;G#', 'Em;Am', 'C;Am', 'F;Dm', 'G#;G', 'F;A#', 'Am;G#', 'C;D', 'G;Am', 'Am;C', 'Am;A#', 'A#;G', 'Am;F', 'A#;Am', 'E;Am', 'Dm;E', 'A;G', 'Am;Dm', 'Em;Dm', 'C;F#m', 'Am;D', 'G#;Em', 'C;Dm', 'C;F', 'G;C', 'A#;A#', 'Am;Caug', 'Fm;G', 'A;A' ] chord_to_pitch = { 'C': 36, 'C#': 37, 'D': 38, 'D#': 39, 'E': 40, 'F': 41, 'F#': 42, 'G': 43, 'G#': 44, 'A': 45, 'A#': 46, 'B': 47 } for time, chord in enumerate(chords): n1, n2 = re.split(";", chord_names[chord[0]]) n1, n2 = re.sub("m", "", n1), re.sub("m", "", n2) bass_track.notes.append( pretty_midi.Note(90, chord_to_pitch[n1], 2 * time, 2 * time + 1)) bass_track.notes.append( pretty_midi.Note(90, chord_to_pitch[n2], 2 * time + 1, 2 * (time + 1))) new_midi_data.instruments.append(bass_track) adjust_tempo(new_midi_data) if params['quantize']: quantize(new_midi_data) new_midi_data.write(fout)
def main(params): # load the checkpoint if params['multi_model'] == 0: checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] model_npy = checkpoint['model'] checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 if 'misc' in checkpoint: misc = checkpoint['misc'] ixtoword = misc['ixtoword'] else: misc = {} ixtoword = checkpoint['ixtoword'] misc['wordtoix'] = checkpoint['wordtoix'] checkpoint_params['softmax_smooth_factor'] = params['softmax_smooth_factor'] checkpoint_params['softmax_propogate'] = params['softmax_propogate'] if checkpoint_params.get('class_out_factoring',0) == 1: checkpoint_params['ixtoclsinfo'] = np.zeros((checkpoint_params['nClasses'],2),dtype=np.int32) ixtoclsinfo = misc['ixtoclsinfo'] checkpoint_params['ixtoclsinfo'][ixtoclsinfo[:,0]] = ixtoclsinfo[:,1:3] if checkpoint_params.get('sched_sampling_mode',None) !=None: checkpoint_params['sched_sampling_mode'] = None BatchGenerator = decodeGenerator(checkpoint_params) # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size']) model = BatchGenerator.model_th else: BatchGenerator = [] model_npy = [] modelTh = [] checkpoint_params = [] for i,checkpoint_path in enumerate(params['checkpoint_path']): checkpoint = pickle.load(open(checkpoint_path, 'rb')) model_npy.append(checkpoint['model']) checkpoint_params.append(checkpoint['params']) checkpoint_params[i]['use_theano'] = 1 BatchGenerator.append(decodeGenerator(checkpoint_params[i])) zipp(model_npy[i],BatchGenerator[i].model_th) modelTh.append(BatchGenerator[i].model_th) modelTh[i]['comb_weight'] = 1.0/params['nmodels'] BatchGenerator[0].prepMultiPredictor(modelTh,checkpoint_params,params['beam_size'],params['nmodels']) # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = copy(checkpoint_params) if checkpoint_params.get('class_out_factoring',0) == 1: blob['checkpoint_params'].pop('ixtoclsinfo') blob['imgblobs'] = [] # load the tasks.txt file and setupe feature loading root_path = params['root_path'] img_names_list = open(params['imgList'], 'r').read().splitlines() auxidxes = [] if len(img_names_list[0].rsplit(',')) > 2: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] auxidxes = [int(x.rsplit(',')[2]) for x in img_names_list] elif len(img_names_list[0].rsplit(',')) > 1: img_names = [x.rsplit(',')[0] for x in img_names_list] idxes = [int(x.rsplit(',')[1]) for x in img_names_list] else: img_names = img_names_list idxes = xrange(len(img_names_list)) #if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file','None') == 'None'): # raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file') # return # load the features for all images if checkpoint_params.get('swap_aux') == 0 or auxidxes == []: features, aux_inp = loadArbitraryFeatures(params, idxes, auxidxes=auxidxes) else: features, aux_inp = loadArbitraryFeatures(params, auxidxes, auxidxes=idxes) N = len(img_names) # iterate over all images and predict sentences print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) kwparams = { 'beam_size' : params['beam_size'] } jsonFname = 'result_struct_%s.json' % (params['fname_append'] ) save_file = os.path.join(root_path, jsonFname) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image if params['multi_model'] == 0: D,NN = features.shape img = {} img['feat'] = features[:, n] if checkpoint_params.get('en_aux_inp',0): img['aux_inp'] = aux_inp[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) else: kwparams['nmodels'] = params['nmodels'] batch = [] for i in xrange(params['nmodels']): img = {} img['feat'] = features[i][:, n] if checkpoint_params[i].get('en_aux_inp',0): img['aux_inp'] = aux_inp[i][:, n] img['local_file_path'] =img_names[n] batch.append({'image':img}) Ys = BatchGenerator[0].predictMulti(batch, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top if checkpoint_params.get('reverse_sentence',0) == 0: candidate = ' '.join([ixtoword[int(ix)] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that else: candidate = ' '.join([ixtoword[int(ix)] for ix in reversed(top_prediction[1]) if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (float(top_prediction[0]), candidate) img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])} # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions)-1): prediction = top_predictions[ci+1] # these are sorted with highest on top candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that candlist.append({'text': candidate, 'logprob': float(prediction[0])}) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) if (n%5000) == 1: print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump result struct to file print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = 'result_%s.html' % (params['fname_append']) html_file = os.path.join(root_path, html_file) print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def main(params): batch_size = params['batch_size'] word_count_threshold = params['word_count_threshold'] max_epochs = params['max_epochs'] # fetch the data provider dp = getDataProvider(params) # Initialize the optimizer solver = Solver(params['solver']) params['aux_inp_size'] = dp.aux_inp_size params['image_feat_size'] = dp.img_feat_size print 'Image feature size is %d, and aux input size is %d' % ( params['image_feat_size'], params['aux_inp_size']) misc = { } # stores various misc items that need to be passed around the framework if params['checkpoint_file_name'] == 'None': # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc[ 'ixtoword'], bias_init_vector = preProBuildWordVocab( dp.iterSentences('train'), word_count_threshold) else: # Load Vocabulary from the checkpoint misc = checkpoint_init['misc'] params['vocabulary_size'] = len(misc['wordtoix']) params['output_size'] = len(misc['ixtoword']) # these should match though # This initializes the generator model parameters and does matrix initializations if params['t_eval_only'] == 0: generator = decodeGenerator(params) # Build the computational graph if params['use_encoder_for'] & 2: aux_enc_inp = generator.model_th['Wemb'] if params[ 'encode_gt_sentences'] else dp.aux_inputs.T hid_size = params['featenc_hidden_size'] auxFeatEncoder = RecurrentFeatEncoder( hid_size, params['image_encoding_size'], params, mdl_prefix='aux_enc_', features=aux_enc_inp) mdlLen = len(generator.model_th.keys()) generator.model_th.update(auxFeatEncoder.model_th) assert (len(generator.model_th.keys()) == ( mdlLen + len(auxFeatEncoder.model_th.keys()))) (auxenc_use_dropout, auxFeatEnc_inp, xAux, updatesLSTMAuxFeat) = auxFeatEncoder.build_model( generator.model_th, params) if params['encode_gt_sentences']: # Reshape it size(batch_size, n_gt, hidden_size) xAux = xAux.reshape((-1, params['n_encgt_sent'], params['featenc_hidden_size'])) # Convert it to size (batch_size, n_gt*hidden_size xAux = xAux.flatten(2) xI = tensor.zeros((batch_size, params['image_encoding_size'])) imgFeatEnc_inp = [] else: auxFeatEnc_inp = [] imgFeatEnc_inp = [] xAux = None xI = None (gen_inp_list, predLogProb, predIdx, predCand, gen_out, updatesLstm, seq_lengths) = generator.build_prediction_model(generator.model_th, params, xI=xI, xAux=xAux) gen_inp_list = imgFeatEnc_inp + auxFeatEnc_inp + gen_inp_list gen_out = gen_out.reshape([ gen_out.shape[0], -1, params['n_gen_samples'], params['vocabulary_size'] ]) #convert updates lstm to a tuple, this is to help merge it with grad updates updatesLstm = [(k, v) for k, v in updatesLstm.iteritems()] f_gen_only = theano.function( gen_inp_list, [predLogProb, predIdx, gen_out, seq_lengths], name='f_pred', updates=updatesLstm) modelGen = generator.model_th upListGen = generator.update_list if params['use_mle_train']: (use_dropout_genTF, inp_list_genTF, _, cost_genTF, _, updatesLSTM_genTF) = generator.build_model( generator.model_th, params) f_eval_genTF = theano.function(inp_list_genTF, cost_genTF, name='f_eval') grads_genTF = tensor.grad(cost_genTF[0], wrt=modelGen.values(), add_names=True) lr_genTF = tensor.scalar(name='lr', dtype=config.floatX) f_grad_genTF, f_update_genTF, zg_genTF, rg_genTF, ud_genTF = solver.build_solver_model( lr_genTF, modelGen, grads_genTF, inp_list_genTF, cost_genTF, params) else: modelGen = [] updatesLstm = [] if params['met_to_track'] != []: trackMetargs = {'eval_metric': params['met_to_track']} refToks, scr_info = eval_prep_refs('val', dp, params['met_to_track']) trackMetargs['refToks'] = refToks trackMetargs['scr_info'] = scr_info # Initialize the evalator model if params['share_Wemb']: evaluator = decodeEvaluator(params, modelGen['Wemb']) else: evaluator = decodeEvaluator(params) modelEval = evaluator.model_th if params['t_eval_only'] == 0: # Build the evaluator graph to evaluate reference and generated captions if params.get('upd_eval_ref', 0): (refeval_inp_list, ref_f_pred_fns, ref_costs, ref_predTh, ref_modelEval) = evaluator.build_advers_eval(modelEval, params) (eval_inp_list, f_pred_fns, costs, predTh, modelEval) = evaluator.build_advers_eval(modelEval, params, gen_inp_list, gen_out, updatesLstm, seq_lengths) else: # Build the evaluator graph to evaluate only reference captions (eval_inp_list, f_pred_fns, costs, predTh, modelEval) = evaluator.build_advers_eval(modelEval, params) # force overwrite here. The bias to the softmax is initialized to reflect word frequencies if params['t_eval_only'] == 0: # and 0: if params['checkpoint_file_name'] == 'None': modelGen['bd'].set_value(bias_init_vector.astype(config.floatX)) if params.get('class_out_factoring', 0) == 1: modelGen['bdCls'].set_value( bias_init_inter_class.astype(config.floatX)) comb_inp_list = eval_inp_list if params['t_eval_only'] == 0: for inp in gen_inp_list: if inp not in comb_inp_list: comb_inp_list.append(inp) # Compile an evaluation function.. Doesn't include gradients # To be used for validation set evaluation or debug purposes if params['t_eval_only'] == 0: f_eval = theano.function(comb_inp_list, costs[:1], name='f_eval', updates=updatesLstm) else: f_eval = theano.function(comb_inp_list, costs[:1], name='f_eval') if params['share_Wemb']: modelEval.pop('Wemb') if params['fix_Wemb']: upListGen.remove('Wemb') #------------------------------------------------------------------------------------------------------------------------- # Now let's build a gradient computation graph and update mechanism #------------------------------------------------------------------------------------------------------------------------- # First compute gradient on the evaluator params w.r.t cost if params.get('upd_eval_ref', 0): gradsEval_ref = tensor.grad(ref_costs[0], wrt=modelEval.values(), add_names=True) gradsEval = tensor.grad(costs[0], wrt=modelEval.values(), add_names=True) # Update functions for the evaluator lrEval = tensor.scalar(name='lrEval', dtype=config.floatX) if params.get('upd_eval_ref', 0): f_grad_comp_eval_ref, f_param_update_eval_ref, _, _, _ = solver.build_solver_model( lrEval, modelEval, gradsEval_ref, refeval_inp_list, ref_costs[0], params, w_clip=params['eval_w_clip']) f_grad_comp_eval, f_param_update_eval, zg_eval, rg_eval, ud_eval = solver.build_solver_model( lrEval, modelEval, gradsEval, comb_inp_list, costs[:1], params, updatesLstm, w_clip=params['eval_w_clip']) # Now compute gradient on the generator params w.r.t the cost if params['t_eval_only'] == 0: gradsGen = tensor.grad(costs[1], wrt=modelGen.values(), add_names=True) lrGen = tensor.scalar(name='lrGen', dtype=config.floatX) # Update functions for the generator f_grad_comp_gen, f_param_update_gen, zg_gen, rg_gen, ud_gen = solver.build_solver_model( lrGen, modelGen, gradsGen, comb_inp_list[:(len(comb_inp_list) - 1 + params['gen_feature_matching'])], costs[1], params, updatesLstm) #------------------------------------------------------------------------------------------------------------------------- # If we want to track some metrics during the training, initialize stuff for that now #------------------------------------------------------------------------------------------------------------------------- print 'model init done.' if params['t_eval_only'] == 0: print 'Gen model has keys: ' + ', '.join(modelGen.keys()) print 'Eval model has keys: ' + ', '.join(modelEval.keys()) # calculate how many iterations we need, One epoch is considered once going through all the sentences and not images # Hence in case of coco/flickr this will 5* no of images num_sentences_total = dp.getSplitSize('train', ofwhat='images') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch skip_first = 20 iters_eval = 5 iters_gen = 1 cost_eval_iter = [] cost_gen_iter = [] trackSc_array = [] eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max( 1, int(num_iters_one_epoch * eval_period_in_epochs)) top_val_ppl2 = -1 smooth_train_ppl2 = 0.5 # initially size of dictionary of confusion smooth_train_cost = 0.0 # initially size of dictionary of confusion smooth_train_cost_gen = 1.0 # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] iter_out_file = os.path.join( 'logs', 'advmodel_checkpoint_%s_%s_%s_log.npz' % (params['dataset'], host, params['fappend'])) len_hist = defaultdict(int) t_print_sec = 30 ## Initialize the model parameters from the checkpoint file if we are resuming training if params['checkpoint_file_name'] != 'None': if params['t_eval_only'] != 1: print '\n Now initing gen Model:' zipp(model_init_gen_from, modelGen) if 'trackers' in checkpoint_init: trackSc_array = checkpoint_init['trackers'].get('trackScores', []) print '\n Now initing Eval Model:' zipp(model_init_eval_from, modelEval) #zipp(rg_init,rgGen) print("\nContinuing training from previous model\n. Already run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint_init['epoch'], \ checkpoint_init['perplexity'])) ############################################################## # Define signal handler to catch ctl-c or kills so that we can save the model trained till that point def signal_handler(signal, frame): print('You pressed Ctrl+C! Saving Checkpoint Now before exiting!') filename = 'advmodel_checkpoint_%s_%s_%s_%.2f_INT.p' % ( params['dataset'], host, params['fappend'], val_ppl2) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, val_ppl2) sys.exit(0) #signal.signal(signal.SIGINT, signal_handler) ############################################################## #In testing disable sampling and use the greedy approach!? generator.usegumbel.set_value(1) if params['met_to_track'] != []: tsc_max, tsc_mean, tsc_min = eval_gen_samps(f_gen_only, dp, params, misc, params['rev_eval'], **trackMetargs) trackSc_array.append((0, { evm + '_max': tsc_max[i] for i, evm in enumerate(params['met_to_track']) })) trackSc_array[-1][1].update({ evm + '_mean': tsc_mean[i] for i, evm in enumerate(params['met_to_track']) }) trackSc_array[-1][1].update({ evm + '_min': tsc_min[i] for i, evm in enumerate(params['met_to_track']) }) disp_some_gen_samps(f_gen_only, dp, params, misc, n_samp=5) evaluator.use_noise.set_value(1.) eval_acc, gen_acc = eval_discrm_gen('val', dp, params, f_pred_fns[0], misc) # Re-enable sampling generator.usegumbel.set_value(1) np.savez(iter_out_file, eval_cost=np.array(cost_eval_iter), gen_cost=np.array(cost_gen_iter), tracksc=np.array(trackSc_array)) smooth_train_cost = 0.0 print '###################### NOW BEGINNING TRAINING #################################' for it in xrange(max_iters): t0 = time.time() # Enable using dropout in training evaluator.use_noise.set_value(1.) dt = 0. it2 = 0 while eval_acc <= 60. or gen_acc >= 45. or it2 < iters_eval * skip_first: # fetch a batch of data t1 = time.time() s_probs = [ 0.6, 0.4, 0.0 ] if params['eval_loss'] == 'contrastive' else [1.0, 0.0, 0.0] batch = dp.sampAdversBatch(batch_size, n_sent=params['n_gen_samples'], probs=s_probs) cnn_inps = prepare_adv_data(batch, misc['wordtoix'], maxlen=params['maxlen'], prep_for=params['eval_model']) enc_inp_list = prepare_seq_features( batch, use_enc_for=params['use_encoder_for'], maxlen=params['maxlen'], use_shared_mem=params['use_shared_mem_enc'], enc_gt_sent=params['encode_gt_sentences'], n_enc_sent=params['n_encgt_sent'], wordtoix=misc['wordtoix']) eval_cost = f_grad_comp_eval(*(cnn_inps + enc_inp_list)) if np.isnan(eval_cost[0]): import pdb pdb.set_trace() f_param_update_eval(params['learning_rate_eval']) # Track training statistics smooth_train_cost = 0.99 * smooth_train_cost + 0.01 * eval_cost[ 0] if it > 0 else eval_cost[0] dt2 = time.time() - t1 if it2 % 500 == 499: gb = 0. #modelGen['gumb_temp'].get_value() if params['use_gumbel_mse'] == 1 else 0 print 'Iter %d/%d Eval Only Iter %d/%d, done. in %.3fs. Eval Cost is %.6f' % ( it, max_iters, it2, iters_eval * skip_first, dt2, smooth_train_cost) if it2 % 100 == 99: eval_acc, gen_acc = eval_discrm_gen('val', dp, params, f_pred_fns[0], misc, n_eval=500) it2 += 1 evaluator.use_noise.set_value(1.) if it >= 0: skip_first = 1 if it >= 100: skip_first = 1 if it % 1000 == 999: skip_first = 1 s_probs = [ 1.0, 0.0, 0.0 ] if params['eval_loss'] == 'contrastive' else [1.0, 0.0, 0.0] batch = dp.sampAdversBatch(batch_size, n_sent=params['n_gen_samples'], probs=s_probs) cnn_inps = prepare_adv_data(batch, misc['wordtoix'], maxlen=params['maxlen'], prep_for=params['eval_model']) enc_inp_list = prepare_seq_features( batch, use_enc_for=params['use_encoder_for'], maxlen=params['maxlen'], use_shared_mem=params['use_shared_mem_enc'], enc_gt_sent=params['encode_gt_sentences'], n_enc_sent=params['n_encgt_sent'], wordtoix=misc['wordtoix']) gen_cost = f_grad_comp_gen( *(cnn_inps[:(len(cnn_inps) - 1 + params['gen_feature_matching'])] + enc_inp_list)) f_param_update_gen(params['learning_rate_gen']) if params['use_mle_train']: generator.usegumbel.set_value(0) batch, l = dp.getRandBatchByLen(batch_size) gen_inp_list, lenS = prepare_data(batch, misc['wordtoix'], params['maxlen']) cost_genMLE = f_grad_genTF(*gen_inp_list) f_update_genTF(np.float32(params['learning_rate_gen'] / 50.0)) generator.usegumbel.set_value(1) dt = time.time() - t0 # print training statistics smooth_train_cost_gen = gen_cost if it == 0 else 0.99 * smooth_train_cost_gen + 0.01 * gen_cost tnow = time.time() if tnow > last_status_write_time + t_print_sec * 1: # every now and then lets write a report gb = 0. #modelGen['gumb_temp'].get_value() if params['use_gumbel_mse'] == 1 else 0 print 'Iter %d/%d done. in %.3fs. Eval Cost is %.6f, Gen Cost is %.6f, temp: %.4f' % (it, max_iters, dt, \ smooth_train_cost, smooth_train_cost_gen, gb) last_status_write_time = tnow cost_eval_iter.append(smooth_train_cost) cost_gen_iter.append(smooth_train_cost_gen) if it % 500 == 499: # Run the generator on the validation set and compute some metrics generator.usegumbel.set_value(1) if params['met_to_track'] != []: #In testing set the temperature to very low, so that it is equivalent to Greed samples tsc_max, tsc_mean, tsc_min = eval_gen_samps( f_gen_only, dp, params, misc, params['rev_eval'], **trackMetargs) trackSc_array.append((it, { evm + '_max': tsc_max[i] for i, evm in enumerate(params['met_to_track']) })) trackSc_array[-1][1].update({ evm + '_mean': tsc_mean[i] for i, evm in enumerate(params['met_to_track']) }) trackSc_array[-1][1].update({ evm + '_min': tsc_min[i] for i, evm in enumerate(params['met_to_track']) }) disp_some_gen_samps(f_gen_only, dp, params, misc, n_samp=5) generator.usegumbel.set_value(1) # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = gen_acc if it % 500 == 499: eval_acc, gen_acc = eval_discrm_gen('val', dp, params, f_pred_fns[0], misc, n_eval=500) if it % 1000 == 999: filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_genacc.p' % ( params['dataset'], host, params['fappend'], it, gen_acc) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, gen_acc) if it % 500 == 499: np.savez(iter_out_file, eval_cost=np.array(cost_eval_iter), gen_cost=np.array(cost_gen_iter), tracksc=np.array(trackSc_array)) # AND we also beat the user-defined threshold or it doesnt exist filename = 'advmodel_checkpoint_%s_%s_%s_%d_%.2f_GenDone.p' % ( params['dataset'], host, params['fappend'], it, g_acc) dumpCheckpoint(filename, params, modelGen, modelEval, misc, it, g_acc)
def main(params): dim = 300 # load the checkpoint checkpoint_path = params['checkpoint_path'] # load glove vect dict #glove_dict_path = '../vecDict.pickle' #with open(glove_dict_path, 'rb') as handle: # vec_dict = pickle.load(handle) print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape fileNameToVector = {} # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] =img_names[n] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # Calculate the sentence vector using avg of GLoVe word vectors #st = EnglishStemmer() #currSentenceVector = np.zeros(dim) #numWords = 0 #for word in candidate.split(): # if st.stem(word) in vec_dict: # currSentenceVector += vec_dict[st.stem(word)].astype(np.float) # numWords += 1 # elif st.stem(word)+'e' in vec_dict: # currSentenceVector += vec_dict[st.stem(word)+'e'].astype(np.float) # numWords += 1 #currSentenceVector /= numWords #fileNameToVector[img['local_file_path']] = currSentenceVector img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump the fileNameToVector mapping to a pickle file #with open('fileNameToVector.pickle', 'wb') as handle: # pickle.dump(fileNameToVector, handle) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html)
def main(params): # load the checkpoint checkpoint_path = params["checkpoint_path"] print "loading checkpoint %s" % (checkpoint_path,) checkpoint = pickle.load(open(checkpoint_path, "rb")) checkpoint_params = checkpoint["params"] dataset = checkpoint_params["dataset"] model = checkpoint["model"] misc = {} misc["wordtoix"] = checkpoint["wordtoix"] ixtoword = checkpoint["ixtoword"] # output blob which we will dump to JSON for visualizing the results blob = {} blob["params"] = params blob["checkpoint_params"] = checkpoint_params blob["imgblobs"] = [] # load the tasks.txt file root_path = params["root_path"] task_file = params["task_file"] img_names = open(task_file, "r").read().splitlines() # load the features for all images """ features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features print features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape """ features_path = params["feature_file"] features = pickle.load(open(features_path)) features = features.T # features = features_struct['feats'] # this is a 4096 x N numpy array of features D, N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print "image %d/%d:" % (n, N) # encode the image img = {} img["feat"] = features[:, n] img["local_file_path"] = img_names[n] # perform the work. heavy lifting happens inside kwparams = {"beam_size": params["beam_size"]} tic = time.time() Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams) toc = time.time() print "image %d/%d: %f" % (n, N, toc - tic) # build up the output img_blob = {} img_blob["img_path"] = img["local_file_path"] img_blob["rnn_time"] = toc - tic img_blob["candidate"] = {"text": [], "logprob": []} # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in for i in xrange(min(5, len(top_predictions))): top_prediction = top_predictions[i] candidate = " ".join( [ixtoword[ix] for ix in top_prediction[1] if ix > 0] ) # ix 0 is the END token, skip that # print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate) img_blob["candidate"]["text"] += [candidate] img_blob["candidate"]["logprob"] += [top_prediction[0]] """ top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) """ # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob["imgblobs"].append(img_blob) # dump result struct to file # save_file = os.path.join(root_path, 'result_struct.json') save_file = params["out_file"] print "writing predictions to %s..." % (save_file,) json.dump(blob, open(save_file, "w")) # dump output html """
def main(params, splitno, model_file): checkpoint_path = model_file max_blocks = params['max_blocks'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] feature_file = checkpoint_params['feature_file'] json_file = checkpoint['json_file'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset, feature_file, json_file) misc = {} misc['classtoix'] = checkpoint['classtoix'] ixtoword = checkpoint['ixtoclass'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all videos in test set and predict class labels BatchGenerator = decodeGenerator(checkpoint_params) n = 0 correct = 0 prev_video_name = '' video_block_count = 0 pred_video_label = [] pred_video_lbl = 0 prev_gt_video_label = 0 label_check = False video_count = 0 stat = [] v_data = {} result = {} for img in dp.iterImagesContext(split='test', max_images=max_blocks): n += 1 print 'clip %d/%d:' % (n, max_blocks) gt_video_label = img['sentences'][0]['tokens'][0] current_video_name = img['filename'] Ys = BatchGenerator.predict([{'image': img}], model, checkpoint_params) pred_frame_labels = np.argmax(Ys[0], axis=1) current_pred_video_label = max_occurrences(pred_frame_labels)[0] # impl based on action recog using visual attn paper - http://arxiv.org/abs/1511.04119 if current_video_name == prev_video_name or n == 1: pred_video_label.append(current_pred_video_label) video_block_count += 1 prev_gt_video_label = gt_video_label prev_video_name = current_video_name label_check = False else: pred_video_lbl = max_occurrences(pred_video_label)[0] if pred_video_lbl == prev_gt_video_label: correct = correct + 1 v_data['video_name'] = prev_video_name v_data['gt_label'] = prev_gt_video_label v_data['pred_label'] = int(pred_video_lbl) stat.append(v_data) v_data = {} pred_video_label = [] video_block_count = 0 label_check = True video_count += 1 # process current video block pred_video_label.append(current_pred_video_label) prev_video_name = current_video_name video_block_count += 1 prev_gt_video_label = gt_video_label if label_check == False: # last block of videos video_count += 1 pred_video_lbl = max_occurrences(pred_video_label)[0] if pred_video_lbl == prev_gt_video_label: correct = correct + 1 v_data['video_name'] = prev_video_name v_data['gt_label'] = prev_gt_video_label v_data['pred_label'] = int(pred_video_lbl) stat.append(v_data) json.dump(stat, open("./status/mmdb_stat_split_%d.json" % (splitno), 'a')) accuracy = correct / float(video_count) result['split'] = splitno result['accuracy'] = accuracy json.dump( result, open("./status/mmdb_split_result_split_%d.json" % (splitno), 'a')) return accuracy
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model_npy = checkpoint['model'] dump_folder = params['dump_folder'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(checkpoint_params) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) if checkpoint_params['use_theano'] == 1: # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params,params['beam_size']) model = BatchGenerator.model_th print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split = 'test', max_images = max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size' : params['beam_size']} #img['feat'] = np.random.rand(*img['feat'].shape) Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top #import pdb; pdb.set_trace() candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = {'text': candidate, 'logprob': float(top_prediction[0])} # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions)-1): prediction = top_predictions[ci+1] # these are sorted with highest on top candidate = ' '.join([ixtoword[int(ix)] for ix in prediction[1] if ix > 0]) # ix 0 is the END token, skip that candlist.append({'text': candidate, 'logprob': float(prediction[0])}) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity # if checkpoint_params['use_theano'] == 0: # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) # else: # gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) # blob['gtppl'] = gtppl # # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): batch_size = params['batch_size'] dataset = params['dataset'] word_count_threshold = params['word_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] # fetch the data provider dp = getDataProvider(dataset) misc = {} # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) if params.get('init_model_from', ''): # load checkpoint checkpoint = pickle.load(open(params['init_model_from'], 'rb')) model = checkpoint['model'] # overwrite the model # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] import csv csvfile = open(os.path.join(params['outdir'],params['generator']+'.csv'),'wb') csvout = csv.writer(csvfile,delimiter=',',quotechar='"') csv_val_file = open(os.path.join(params['outdir'],params['generator']+'_val.csv'),'wb') csv_val_out = csv.writer(csv_val_file,delimiter=',',quotechar='"') for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) csvout.writerow([it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'],train_ppl2, smooth_train_ppl2]) csvfile.flush() if not host=='oliver-Aurora-R4': sys.stdout.flush() # os.system('./update_plots.sh') # perform gradient check if desired, with a bit of a burnin time (10 iterations) if it == 10 and do_grad_check: print 'disabling dropout for gradient check...' params['drop_prob_encoder'] = 0 params['drop_prob_decoder'] = 0 solver.gradCheck(batch, model, costfun) print 'done gradcheck, exitting.' sys.exit() # hmmm. probably should exit here # detect if loss is exploding and kill the job if so total_cost = cost['total_cost'] if it == 0: total_cost0 = total_cost # store this initial cost if total_cost > total_cost0 * 2: print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' abort = True # set the abort flag, we'll break out # logging: write JSON files for visual inspection of the training tnow = time.time() if tnow > last_status_write_time + 60*1: # every now and then lets write a report last_status_write_time = tnow jstatus = {} jstatus['time'] = datetime.datetime.now().isoformat() jstatus['iter'] = (it, max_iters) jstatus['epoch'] = (epoch, max_epochs) jstatus['time_per_batch'] = dt jstatus['smooth_train_ppl2'] = smooth_train_ppl2 jstatus['val_ppl2'] = val_ppl2 # just write the last available one jstatus['train_ppl2'] = train_ppl2 json_worker_status['history'].append(jstatus) status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json') try: json.dump(json_worker_status, open(status_file, 'w')) except Exception, e: # todo be more clever here print 'tried to write worker status into %s but got error:' % (status_file, ) print e # perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it+1) == max_iters if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set print 'validation perplexity = %f' % (val_ppl2, ) cp_pred = {} cp_pred['it'] = it cp_pred['epoch'] = epoch cp_pred['model'] = model cp_pred['params'] = params cp_pred['perplexity'] = val_ppl2 cp_pred['wordtoix'] = misc['wordtoix'] cp_pred['ixtoword'] = misc['ixtoword'] cp_pred['algorithm'] = params['generator'] cp_pred['outdir'] = params['outdir'] if is_last_iter: scores = eval_sentence_predictions.run(cp_pred) csv_val_out.writerow([it, max_iters, dt, epoch, val_ppl2, scores[0],scores[1],scores[2],scores[3],scores[4],scores[5],scores[6]]) csv_val_file.flush() omail.send('job finished'+params['generator'],'done') # abort training if the perplexity is no good min_ppl_or_abort = params['min_ppl_or_abort'] if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0: print 'aborting job because validation perplexity %f < %f' % (val_ppl2, min_ppl_or_abort) abort = True # abort the job write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = val_ppl2 filename = 'model_%s_checkpoint_%s_%s_%s_%.2f.p' % (params['generator'],dataset, host, params['fappend'], val_ppl2) filepath = os.path.join(params['outdir'], filename) checkpoint = {} checkpoint['it'] = it checkpoint['epoch'] = epoch checkpoint['model'] = model checkpoint['params'] = params checkpoint['perplexity'] = val_ppl2 checkpoint['wordtoix'] = misc['wordtoix'] checkpoint['ixtoword'] = misc['ixtoword'] checkpoint['algorithm'] = params['generator'] checkpoint['outdir'] = params['outdir'] try: pickle.dump(checkpoint, open(filepath, "wb")) print 'saved checkpoint in %s' % (filepath, ) except Exception, e: # todo be more clever here print 'tried to write checkpoint into %s but got error: ' % (filepat, ) print e scores = eval_sentence_predictions.run(checkpoint) csv_val_out.writerow([it, max_iters, dt, epoch, val_ppl2, scores[0],scores[1],scores[2],scores[3],scores[4],scores[5],scores[6]]) csv_val_file.flush()
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model_npy = checkpoint['model'] dump_folder = params['dump_folder'] if 'use_theano' not in checkpoint_params: checkpoint_params['use_theano'] = 1 checkpoint_params['use_theano'] = 1 if 'image_feat_size' not in checkpoint_params: checkpoint_params['image_feat_size'] = 4096 if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(checkpoint_params) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) if checkpoint_params['use_theano'] == 1: # Compile and init the theano predictor BatchGenerator.prepPredictor(model_npy, checkpoint_params, params['beam_size']) model = BatchGenerator.model_th print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \ checkpoint['perplexity'])) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} img['feat'] = np.random.rand(*img['feat'].shape) Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join( dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top #import pdb; pdb.set_trace() candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': float(top_prediction[0]) } # Code to save all the other candidates candlist = [] for ci in xrange(len(top_predictions) - 1): prediction = top_predictions[ ci + 1] # these are sorted with highest on top candidate = ' '.join([ ixtoword[int(ix)] for ix in prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candlist.append({ 'text': candidate, 'logprob': float(prediction[0]) }) img_blob['candidatelist'] = candlist blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference' + ` q `, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity # if checkpoint_params['use_theano'] == 0: # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) # else: # gtppl = eval_split_theano('test', dp, model, checkpoint_params, misc, BatchGenerator.f_eval, eval_max_images = max_images) # perform the evaluation on VAL set # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) # blob['gtppl'] = gtppl # # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) all_bleu_scores = [] n = 0 #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images): for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [x['tokens'] for x in img['sentences']] # as list of lists of tokens kwparams = { 'tanhC_version' : checkpoint_params.get('tanhC_version', 0) ,\ 'beam_size' : params['beam_size'],\ 'generator' : checkpoint_params['generator']} Ys = BatchGenerator.predict([{'image':img}], model, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtwords in references: print 'GT: ' + ' '.join(gtwords) img_blob['references'].append({'text': ' '.join(gtwords)}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = [ixtoword[ix] for ix in top_prediction[1]] print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate)) bleu_scores = evalCandidate(candidate, references) print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores) img_blob['candidate'] = {'text': ' '.join(candidate), 'logprob': top_prediction[0], 'bleu': bleu_scores} all_bleu_scores.append(bleu_scores) blob['imgblobs'].append(img_blob) print 'final average bleu scores:' bleu_averages = [sum(x[i] for x in all_bleu_scores)*1.0/len(all_bleu_scores) for i in xrange(3)] blob['final_result'] = { 'bleu' : bleu_averages } print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words: %f' % (gtppl, ) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print('loading checkpoint %s' % (checkpoint_path, )) checkpoint = pickle.load(open(checkpoint_path, 'rb'), encoding='latin1') checkpoint_params = checkpoint['params'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = params['root_path'] img_names = open(os.path.join(root_path, 'img', 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'self_img_vgg_feats.npy') # features_struct = scipy.io.loadmat(features_path) features = np.load(features_path) features = features.T # this is a 4096 x N numpy array of features D, N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in range(N): print('image %d/%d:' % (n, N)) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print('PRED: (%f) %s' % (top_prediction[0], candidate)) img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print('writing predictions to %s...' % (save_file, )) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % ('img/' + img['img_path'], ) html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text']) html_file = os.path.join(root_path, 'result.html') print('writing html result file to %s...' % (html_file, )) open(html_file, 'w').write(html)
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file # TODO FIND EASY WAY TO CALL FILE WITH PROPER root root_path = params['root_path'] img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines() # load the features for all images features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] print img['local_file_path'] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) print Ys # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] # encode the top prediction # top_predictions = Ys[0] # take predictions for the first (and only) image we passed in # top_prediction = top_predictions[0] # these are sorted with highest on top # candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that # print 'PRED: (%f) %s' % (top_prediction[0], candidate) # img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} # blob['imgblobs'].append(img_blob) # encode the top prediction my attempt at showing all candidates img_blob['candidates'] = [] top_predictions = Ys[0] # take predictions for the first (and only) image we passed in print 'Number of name candidates', top_predictions # TODO TIME IT. SEEMS PRETTY FAST THOUGH for i in range(0, len(top_predictions)): top_prediction = top_predictions[i] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print [ixtoword[ix] for ix in top_prediction[1] if ix > 0] print 'PRED: (%f) %s' % (top_prediction[0], candidate) img_blob['candidates'].append({'text': candidate, 'logprob': top_prediction[0]}) # VERY IMPORTANT LINE blob['imgblobs'].append(img_blob) # dump result struct to file save_file = os.path.join(root_path, 'result_struct.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html html = '' for img in blob['imgblobs']: html += '<img src="%s" height="400"><br>' % (img['img_path'], ) #print 'Number of name candidates', len(img['candidates']) for i in range(0, len(img['candidates'])): html += '(%f) %s <br><br>' % (img['candidates'][i]['logprob'], img['candidates'][i]['text']) #print html html_file = os.path.join(root_path, 'result.html') print 'writing html result file to %s...' % (html_file, ) open(html_file, 'w').write(html) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('checkpoint_path', type=str, help='the input checkpoint') parser.add_argument('-r', '--root_path', default='example_images', type=str, help='folder with the images, tasks.txt file, and corresponding vgg_feats.mat file') parser.add_argument('-b', '--beam_size', type=int, default=1, help='beam size in inference. 1 indicates greedy per-word max procedure. Good value is approx 20 or so, and more = better.') args = parser.parse_args() params = vars(args) # convert to ordinary dict print 'parsed parameters:' print json.dumps(params, indent = 2) main(params)
def main(video_name): # load the checkpoint checkpoint_path = '/home/t-yuche/neuraltalk/models/flickr8k_cnn_lstm_v1.p' print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] # output blob which we will dump to JSON for visualizing the results blob = {} blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # load the tasks.txt file root_path = os.path.join('/mnt/frames', video_name) all_frames = [os.path.join('/mnt/frames/', video_name, x) for x in os.listdir(os.path.join('/mnt/frames', video_name))] # Load unprocessed frames to filenames fei_cap_data = load_video_caption('/mnt/tags/fei-caption-keyframe', video_name) processed_frames = [x['img_path'] for x in fei_cap_data] blob['imgblobs'] = blob['imgblobs'] + fei_cap_data img_names = [] for frame in all_frames: if frame not in processed_frames: img_names += [frame] # load the features for all images ''' features_path = os.path.join(root_path, 'vgg_feats.mat') features_struct = scipy.io.loadmat(features_path) features = features_struct['feats'] # this is a 4096 x N numpy array of features print features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape ''' features_path = os.path.join('/mnt/tags/fei-caption-all-pickle', video_name + '.pickle') features = pickle.load(open(features_path)) features = features.T #features = features_struct['feats'] # this is a 4096 x N numpy array of features D,N = features.shape # iterate over all images and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) for n in xrange(N): print 'image %d/%d:' % (n, N) # encode the image img = {} img['feat'] = features[:, n] img['local_file_path'] = img_names[n] # perform the work. heavy lifting happens inside kwparams = { 'beam_size' : 20 } tic = time.time() Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) toc = time.time() print 'image %d/%d: %f' % (n, N, toc-tic) # build up the output img_blob = {} img_blob['img_path'] = img['local_file_path'] img_blob['rnn_time'] = (toc-tic) img_blob['candidate'] = {'text': [], 'logprob': []} # encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in for i in xrange(min(5, len(top_predictions))): top_prediction = top_predictions[i] candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that #print '%f PRED: (%f) %s' % (img_blob['rnn_time'], top_prediction[0], candidate) img_blob['candidate']['text'] += [candidate] img_blob['candidate']['logprob'] += [top_prediction[0]] ''' top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) ''' #img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # dump result struct to file #save_file = os.path.join(root_path, 'result_struct.json') save_file = os.path.join('/mnt/tags/fei-caption-all', video_name + '_5_caption.json') print 'writing predictions to %s...' % (save_file, ) json.dump(blob, open(save_file, 'w')) # dump output html '''