def main(params): batch_size = params['batch_size'] dataset = params['dataset'] word_count_threshold = params['word_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname # fetch the data provider dp = getDataProvider(dataset) misc = {} # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold) # print '...' # print misc['wordtoix'] # delegate the initialization of the model to the Generator class BatchGenerator = batchDecodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) if params.get('init_model_from', ''): # load checkpoint checkpoint = pickle.load(open(params['init_model_from'], 'rb')) model = checkpoint['model'] # overwrite the model # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) # perform gradient check if desired, with a bit of a burnin time (10 iterations) if it == 10 and do_grad_check: print 'disabling dropout for gradient check...' params['drop_prob_encoder'] = 0 params['drop_prob_decoder'] = 0 solver.gradCheck(batch, model, costfun) print 'done gradcheck, exitting.' sys.exit() # hmmm. probably should exit here # detect if loss is exploding and kill the job if so total_cost = cost['total_cost'] if it == 0: total_cost0 = total_cost # store this initial cost if total_cost > total_cost0 * 2: print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' abort = True # set the abort flag, we'll break out # logging: write JSON files for visual inspection of the training tnow = time.time() if tnow > last_status_write_time + 60*1: # every now and then lets write a report last_status_write_time = tnow jstatus = {} jstatus['time'] = datetime.datetime.now().isoformat() jstatus['iter'] = (it, max_iters) jstatus['epoch'] = (epoch, max_epochs) jstatus['time_per_batch'] = dt jstatus['smooth_train_ppl2'] = smooth_train_ppl2 jstatus['val_ppl2'] = val_ppl2 # just write the last available one jstatus['train_ppl2'] = train_ppl2 json_worker_status['history'].append(jstatus) status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json') try: json.dump(json_worker_status, open(status_file, 'w')) except Exception, e: # todo be more clever here print 'tried to write worker status into %s but got error:' % (status_file, ) print e # perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it+1) == max_iters if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set print 'validation perplexity = %f' % (val_ppl2, ) # abort training if the perplexity is no good min_ppl_or_abort = params['min_ppl_or_abort'] if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0: print 'aborting job because validation perplexity %f < %f' % (val_ppl2, min_ppl_or_abort) abort = True # abort the job write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold'] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = val_ppl2 filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2) filepath = os.path.join(params['checkpoint_output_directory'], filename) checkpoint = {} checkpoint['it'] = it checkpoint['epoch'] = epoch checkpoint['model'] = model checkpoint['params'] = params checkpoint['perplexity'] = val_ppl2 checkpoint['wordtoix'] = misc['wordtoix'] checkpoint['ixtoword'] = misc['ixtoword'] try: pickle.dump(checkpoint, open(filepath, "wb")) print 'saved checkpoint in %s' % (filepath, ) except Exception, e: # todo be more clever here print 'tried to write checkpoint into %s but got error: ' % (filepat, ) print e
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) all_bleu_scores = [] n = 0 #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images): for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [x['tokens'] for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtwords in references: print 'GT: ' + ' '.join(gtwords) img_blob['references'].append({'text': ' '.join(gtwords)}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = [ixtoword[ix] for ix in top_prediction[1]] print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate)) bleu_scores = evalCandidate(candidate, references) print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores) img_blob['candidate'] = { 'text': ' '.join(candidate), 'logprob': top_prediction[0], 'bleu': bleu_scores } all_bleu_scores.append(bleu_scores) blob['imgblobs'].append(img_blob) print 'final average bleu scores:' bleu_averages = [ sum(x[i] for x in all_bleu_scores) * 1.0 / len(all_bleu_scores) for i in xrange(3) ] blob['final_result'] = {'bleu': bleu_averages} print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words: %f' % (gtppl, ) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference' + ` q `, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % ( len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def gen_from_test(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] fout = params['output_file'] tempo = params['tempo'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = { } # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] candidates = [] for img in dp.iterImages(split='test', max_images=max_images): n += 1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = {'beam_size': params['beam_size']} Ys = BatchGenerator.predict([{ 'image': img }], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join( dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[ 0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[ 0] # these are sorted with highest on top candidate = ' '.join([ ixtoword[ix] for ix in top_prediction[1] if ix > 0 ]) # ix 0 is the END token, skip that candidates.append(candidate) print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = { 'text': candidate, 'logprob': top_prediction[0] } blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(1): open('eval/reference' + ` q `, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images=max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % ( len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file # print 'saving result struct to %s' % (params['result_struct_filename'], ) # json.dump(blob, open(params['result_struct_filename'], 'w')) for idx, c in enumerate(candidates): cs = c.split() for e in cs: es = e.split(';') pitch = int(es[0]) pos = es[1] pos = convert_pos(pos, idx) dur = es[2] dur = convert_dur(dur) note = pretty_midi.Note(90, pitch, pos, pos + dur) new_track.notes.append(note) new_midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo) new_midi_data.instruments.append(new_track) # pre-set chord preogression bass_track.notes.append(pretty_midi.Note(90, 36, 0, 1)) bass_track.notes.append(pretty_midi.Note(90, 47, 1, 2)) bass_track.notes.append(pretty_midi.Note(90, 45, 2, 3)) bass_track.notes.append(pretty_midi.Note(90, 43, 3, 4)) bass_track.notes.append(pretty_midi.Note(90, 41, 4, 5)) bass_track.notes.append(pretty_midi.Note(90, 40, 5, 6)) bass_track.notes.append(pretty_midi.Note(90, 38, 6, 7)) bass_track.notes.append(pretty_midi.Note(90, 43, 7, 8)) bass_track.notes.append(pretty_midi.Note(90, 36, 8, 9)) bass_track.notes.append(pretty_midi.Note(90, 47, 9, 10)) bass_track.notes.append(pretty_midi.Note(90, 45, 10, 11)) bass_track.notes.append(pretty_midi.Note(90, 43, 11, 12)) bass_track.notes.append(pretty_midi.Note(90, 41, 12, 13)) bass_track.notes.append(pretty_midi.Note(90, 40, 13, 14)) bass_track.notes.append(pretty_midi.Note(90, 38, 14, 15)) bass_track.notes.append(pretty_midi.Note(90, 43, 15, 16)) bass_track.notes.append(pretty_midi.Note(90, 45, 16, 17)) bass_track.notes.append(pretty_midi.Note(90, 41, 17, 18)) bass_track.notes.append(pretty_midi.Note(90, 36, 18, 19)) bass_track.notes.append(pretty_midi.Note(90, 43, 19, 20)) bass_track.notes.append(pretty_midi.Note(90, 45, 20, 21)) bass_track.notes.append(pretty_midi.Note(90, 41, 21, 22)) bass_track.notes.append(pretty_midi.Note(90, 43, 22, 23)) bass_track.notes.append(pretty_midi.Note(90, 43, 23, 24)) bass_track.notes.append(pretty_midi.Note(90, 36, 24, 25)) bass_track.notes.append(pretty_midi.Note(90, 47, 25, 26)) bass_track.notes.append(pretty_midi.Note(90, 45, 26, 27)) bass_track.notes.append(pretty_midi.Note(90, 43, 27, 28)) bass_track.notes.append(pretty_midi.Note(90, 41, 28, 29)) bass_track.notes.append(pretty_midi.Note(90, 40, 29, 30)) bass_track.notes.append(pretty_midi.Note(90, 38, 30, 31)) bass_track.notes.append(pretty_midi.Note(90, 43, 31, 32)) bass_track.notes.append(pretty_midi.Note(90, 36, 32, 33)) bass_track.notes.append(pretty_midi.Note(90, 47, 33, 34)) bass_track.notes.append(pretty_midi.Note(90, 45, 34, 35)) bass_track.notes.append(pretty_midi.Note(90, 43, 35, 36)) bass_track.notes.append(pretty_midi.Note(90, 41, 36, 37)) bass_track.notes.append(pretty_midi.Note(90, 40, 37, 38)) bass_track.notes.append(pretty_midi.Note(90, 38, 38, 39)) bass_track.notes.append(pretty_midi.Note(90, 43, 39, 40)) new_midi_data.instruments.append(bass_track) adjust_tempo(new_midi_data) if params['quantize']: quantize(new_midi_data) new_midi_data.write(fout)
def main(params): # load the checkpoint checkpoint_path = params["checkpoint_path"] max_images = params["max_images"] print "loading checkpoint %s" % (checkpoint_path,) checkpoint = pickle.load(open(checkpoint_path, "rb")) checkpoint_params = checkpoint["params"] dataset = checkpoint_params["dataset"] model = checkpoint["model"] dump_folder = params["dump_folder"] if dump_folder: print "creating dump folder " + dump_folder os.system("mkdir -p " + dump_folder) ## ANAND - CHANGE TEST PATH # fetch the data provider # dp = getDataProvider(dataset) # pdb.set_trace() dp = getDataProvider("example_images") misc = {} misc["wordtoix"] = checkpoint["wordtoix"] ixtoword = checkpoint["ixtoword"] blob = {} # output blob which we will dump to JSON for visualizing the results blob["params"] = params blob["checkpoint_params"] = checkpoint_params blob["imgblobs"] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split="test", max_images=max_images): n += 1 print "image %d/%d:" % (n, max_images) # pdb.set_trace() references = [" ".join(x["tokens"]) for x in img["sentences"]] # as list of lists of tokens kwparams = {"beam_size": params["beam_size"]} Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob["img_path"] = img["local_file_path"] img_blob["imgid"] = img["imgid"] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img["local_file_path"] target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"])) os.system("cp %s %s" % (source_file, target_file)) # encode the human-provided references img_blob["references"] = [] for gtsent in references: print "GT: " + gtsent img_blob["references"].append({"text": gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print "PRED: (%f) %s" % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]} blob["imgblobs"].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print "writing intermediate files into eval/" open("eval/output", "w").write("\n".join(all_candidates)) for q in xrange(5): open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print "invoking eval/multi-bleu.perl script..." owd = os.getcwd() os.chdir("eval") os.system("./multi-bleu.perl reference < output") os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split("test", dp, model, checkpoint_params, misc, eval_max_images=max_images) print "perplexity of ground truth words based on dictionary of %d words: %f" % (len(ixtoword), gtppl) blob["gtppl"] = gtppl # dump result struct to file print "saving result struct to %s" % (params["result_struct_filename"],) json.dump(blob, open(params["result_struct_filename"], "w"))
def main(params): batch_size = params['batch_size'] dataset = params['dataset'] word_count_threshold = params['word_count_threshold'] do_grad_check = params['do_grad_check'] max_epochs = params['max_epochs'] host = socket.gethostname() # get computer hostname # fetch the data provider dp = getDataProvider(dataset) misc = { } # stores various misc items that need to be passed around the framework # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur # at least word_count_threshold number of times misc['wordtoix'], misc[ 'ixtoword'], bias_init_vector = preProBuildWordVocab( dp.iterSentences('train'), word_count_threshold) # delegate the initialization of the model to the Generator class BatchGenerator = decodeGenerator(params) init_struct = BatchGenerator.init(params, misc) model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize']) # force overwrite here. This is a bit of a hack, not happy about it model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size) print 'model init done.' print 'model has keys: ' + ', '.join(model.keys()) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update']) print 'updating: ' + ', '.join('%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize']) print 'number of learnable parameters total: %d' % (sum( model[k].shape[0] * model[k].shape[1] for k in misc['update']), ) if params.get('init_model_from', ''): # load checkpoint checkpoint = pickle.load(open(params['init_model_from'], 'rb')) model = checkpoint['model'] # overwrite the model # initialize the Solver and the cost function solver = Solver() def costfun(batch, model): # wrap the cost function to abstract some things away from the Solver return RNNGenCost(batch, model, params, misc) # calculate how many iterations we need num_sentences_total = dp.getSplitSize('train', ofwhat='sentences') num_iters_one_epoch = num_sentences_total / batch_size max_iters = max_epochs * num_iters_one_epoch eval_period_in_epochs = params['eval_period'] eval_period_in_iters = max( 1, int(num_iters_one_epoch * eval_period_in_epochs)) abort = False top_val_ppl2 = -1 smooth_train_ppl2 = len( misc['ixtoword']) # initially size of dictionary of confusion val_ppl2 = len(misc['ixtoword']) last_status_write_time = 0 # for writing worker job status reports json_worker_status = {} json_worker_status['params'] = params json_worker_status['history'] = [] for it in xrange(max_iters): if abort: break t0 = time.time() # fetch a batch of data batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)] # evaluate cost, gradient and perform parameter update step_struct = solver.step(batch, model, costfun, **params) cost = step_struct['cost'] dt = time.time() - t0 # print training statistics train_ppl2 = step_struct['stats']['ppl2'] smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out epoch = it * 1.0 / num_iters_one_epoch print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \ % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \ train_ppl2, smooth_train_ppl2) # perform gradient check if desired, with a bit of a burnin time (10 iterations) if it == 10 and do_grad_check: print 'disabling dropout for gradient check...' params['drop_prob_encoder'] = 0 params['drop_prob_decoder'] = 0 solver.gradCheck(batch, model, costfun) print 'done gradcheck, exitting.' sys.exit() # hmmm. probably should exit here # detect if loss is exploding and kill the job if so total_cost = cost['total_cost'] if it == 0: total_cost0 = total_cost # store this initial cost if total_cost > total_cost0 * 2: print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?' abort = True # set the abort flag, we'll break out # logging: write JSON files for visual inspection of the training tnow = time.time() if tnow > last_status_write_time + 60 * 1: # every now and then lets write a report last_status_write_time = tnow jstatus = {} jstatus['time'] = datetime.datetime.now().isoformat() jstatus['iter'] = (it, max_iters) jstatus['epoch'] = (epoch, max_epochs) jstatus['time_per_batch'] = dt jstatus['smooth_train_ppl2'] = smooth_train_ppl2 jstatus['val_ppl2'] = val_ppl2 # just write the last available one jstatus['train_ppl2'] = train_ppl2 json_worker_status['history'].append(jstatus) status_file = os.path.join( params['worker_status_output_directory'], host + '_status.json') try: json.dump(json_worker_status, open(status_file, 'w')) except Exception, e: # todo be more clever here print 'tried to write worker status into %s but got error:' % ( status_file, ) print e # perform perplexity evaluation on the validation set and save a model checkpoint if it's good is_last_iter = (it + 1) == max_iters if (((it + 1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter: val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set print 'validation perplexity = %f' % (val_ppl2, ) # abort training if the perplexity is no good min_ppl_or_abort = params['min_ppl_or_abort'] if val_ppl2 > min_ppl_or_abort and min_ppl_or_abort > 0: print 'aborting job because validation perplexity %f < %f' % ( val_ppl2, min_ppl_or_abort) abort = True # abort the job write_checkpoint_ppl_threshold = params[ 'write_checkpoint_ppl_threshold'] if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0: if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0: # if we beat a previous record or if this is the first time # AND we also beat the user-defined threshold or it doesnt exist top_val_ppl2 = val_ppl2 filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % ( dataset, host, params['fappend'], val_ppl2) filepath = os.path.join( params['checkpoint_output_directory'], filename) checkpoint = {} checkpoint['it'] = it checkpoint['epoch'] = epoch checkpoint['model'] = model checkpoint['params'] = params checkpoint['perplexity'] = val_ppl2 checkpoint['wordtoix'] = misc['wordtoix'] checkpoint['ixtoword'] = misc['ixtoword'] try: pickle.dump(checkpoint, open(filepath, "wb")) print 'saved checkpoint in %s' % (filepath, ) except Exception, e: # todo be more clever here print 'tried to write checkpoint into %s but got error: ' % ( filepat, ) print e
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = { 'beam_size' : params['beam_size'] } Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] # fetch the data provider dp = getDataProvider(dataset) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) all_bleu_scores = [] n = 0 #for img in dp.iterImages(split = 'test', shuffle = True, max_images = max_images): for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [x['tokens'] for x in img['sentences']] # as list of lists of tokens kwparams = { 'tanhC_version' : checkpoint_params.get('tanhC_version', 0) ,\ 'beam_size' : params['beam_size'],\ 'generator' : checkpoint_params['generator']} Ys = BatchGenerator.predict([{'image':img}], model, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] # encode the human-provided references img_blob['references'] = [] for gtwords in references: print 'GT: ' + ' '.join(gtwords) img_blob['references'].append({'text': ' '.join(gtwords)}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = [ixtoword[ix] for ix in top_prediction[1]] print 'PRED: (%f) %s' % (top_prediction[0], ' '.join(candidate)) bleu_scores = evalCandidate(candidate, references) print 'BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_scores) img_blob['candidate'] = {'text': ' '.join(candidate), 'logprob': top_prediction[0], 'bleu': bleu_scores} all_bleu_scores.append(bleu_scores) blob['imgblobs'].append(img_blob) print 'final average bleu scores:' bleu_averages = [sum(x[i] for x in all_bleu_scores)*1.0/len(all_bleu_scores) for i in xrange(3)] blob['final_result'] = { 'bleu' : bleu_averages } print 'FINAL BLEU: B-1: %f B-2: %f B-3: %f' % tuple(bleu_averages) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words: %f' % (gtppl, ) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))
def main(params): # load the checkpoint checkpoint_path = params['checkpoint_path'] max_images = params['max_images'] print 'loading checkpoint %s' % (checkpoint_path, ) checkpoint = pickle.load(open(checkpoint_path, 'rb')) checkpoint_params = checkpoint['params'] dataset = checkpoint_params['dataset'] model = checkpoint['model'] dump_folder = params['dump_folder'] if dump_folder: print 'creating dump folder ' + dump_folder os.system('mkdir -p ' + dump_folder) # fetch the data provider dp = getDataProvider(dataset, params['pert']) dp.load_topic_models(dataset, params['lda']) misc = {} misc['wordtoix'] = checkpoint['wordtoix'] ixtoword = checkpoint['ixtoword'] blob = {} # output blob which we will dump to JSON for visualizing the results blob['params'] = params blob['checkpoint_params'] = checkpoint_params blob['imgblobs'] = [] # iterate over all images in test set and predict sentences BatchGenerator = decodeGenerator(checkpoint_params) n = 0 all_references = [] all_candidates = [] # Added for CCA and perturbed dataset if params['cca']: pert_str = '' if params['pert']: pert_str = '_pert' ccaweights = np.loadtxt('cca/imageprojection_'+str(params['cca'])+pert_str+'.txt', delimiter = ',') misc['ccaweights'] = ccaweights else: ccaweights = None for img in dp.iterImages(split = 'test', max_images = max_images): n+=1 print 'image %d/%d:' % (n, max_images) references = [' '.join(x['tokens']) for x in img['sentences']] # as list of lists of tokens kwparams = { 'beam_size' : params['beam_size'], 'normalization': params['normalization'], 'ccaweights' : ccaweights } # Added for idf normalization if params['normalization']=='idf' or params['normalization']=='combined': idf = load_idf() kwparams['idf']=idf kwparams['words']=ixtoword else: kwparams['idf']=None kwparams['words']=None # Added for LDA if not params['lda'] == 0: Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams) else: Ys = BatchGenerator.predict_test([{'image':img}], model, checkpoint_params, **kwparams) img_blob = {} # we will build this up img_blob['img_path'] = img['local_file_path'] img_blob['imgid'] = img['imgid'] if dump_folder: # copy source file to some folder. This makes it easier to distribute results # into a webpage, because all images that were predicted on are in a single folder source_file = img['local_file_path'] target_file = os.path.join(dump_folder, os.path.basename(img['local_file_path'])) os.system('cp %s %s' % (source_file, target_file)) # encode the human-provided references img_blob['references'] = [] for gtsent in references: print 'GT: ' + gtsent img_blob['references'].append({'text': gtsent}) # now evaluate and encode the top prediction top_predictions = Ys[0] # take predictions for the first (and only) image we passed in top_prediction = top_predictions[0] # these are sorted with highest on top candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that print 'PRED: (%f) %s' % (top_prediction[0], candidate) # save for later eval all_references.append(references) all_candidates.append(candidate) img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]} blob['imgblobs'].append(img_blob) # use perl script to eval BLEU score for fair comparison to other research work # first write intermediate files print 'writing intermediate files into eval/' open('eval/output', 'w').write('\n'.join(all_candidates)) for q in xrange(5): open('eval/reference'+`q`, 'w').write('\n'.join([x[q] for x in all_references])) # invoke the perl script to get BLEU scores print 'invoking eval/multi-bleu.perl script...' owd = os.getcwd() os.chdir('eval') os.system('./multi-bleu.perl reference < output') os.chdir(owd) # now also evaluate test split perplexity gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images) print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl) blob['gtppl'] = gtppl # dump result struct to file print 'saving result struct to %s' % (params['result_struct_filename'], ) json.dump(blob, open(params['result_struct_filename'], 'w'))