def main(params): # see documentation in capgen.py for more details on hyperparams _, validerr, _ = train(saveto=params["model"], attn_type=params["attn-type"], reload_=params["reload"], dim_word=params["dim-word"], ctx_dim=params["ctx-dim"], dim=params["dim"], n_layers_att=params["n-layers-att"], n_layers_out=params["n-layers-out"], n_layers_lstm=params["n-layers-lstm"], n_layers_init=params["n-layers-init"], n_words=params["n-words"], lstm_encoder=params["lstm-encoder"], decay_c=params["decay-c"], alpha_c=params["alpha-c"], prev2out=params["prev2out"], ctx2out=params["ctx2out"], lrate=params["learning-rate"], optimizer=params["optimizer"], selector=params["selector"], max_epochs = params["max-epochs"], patience=10, maxlen=100, batch_size=64, valid_batch_size=64, validFreq= params['valid-freq'], # 2000, dispFreq=1, saveFreq=1000, sampleFreq=250, dataset=params["dataset"], use_dropout=params["use-dropout"], use_dropout_lstm=params["use-dropout-lstm"], save_per_epoch=params["save-per-epoch"]) print "Final cost: {:.2f}".format(validerr.mean())
def main(params): # see documentation in capgen.py for more details on hyperparams _, validerr, _ = train(saveto=params["model"], attn_type=params["attn-type"], reload_=params["reload"], dim_word=params["dim-word"], ctx_dim=params["ctx-dim"], dim=params["dim"], n_layers_att=params["n-layers-att"], n_layers_out=params["n-layers-out"], n_layers_lstm=params["n-layers-lstm"], n_layers_init=params["n-layers-init"], n_words=params["n-words"], lstm_encoder=params["lstm-encoder"], decay_c=params["decay-c"], alpha_c=params["alpha-c"], prev2out=params["prev2out"], ctx2out=params["ctx2out"], lrate=params["learning-rate"], optimizer=params["optimizer"], selector=params["selector"], patience=10, maxlen=100, batch_size=64, valid_batch_size=64, validFreq=2000, dispFreq=1, saveFreq=1000, sampleFreq=250, dataset="coco", use_dropout=params["use-dropout"], use_dropout_lstm=params["use-dropout-lstm"], save_per_epoch=params["save-per-epoch"]) print "Final cost: {:.2f}".format(validerr.mean())
def main(): m_score = train( dim_word=512, # word vector dimensionality ctx_dim=512, # context vector dimensionality dim=1000, # the number of LSTM units attn_type="deterministic", # Soft atten n_layers_att=2, # number of layers used to compute the attention weights n_layers_out=1, # number of layers used to compute logit n_layers_lstm=1, # number of lstm layers n_layers_init=1, # number of layers to initialize LSTM at time 0 lstm_encoder=False, # if True, run bidirectional LSTM on input units prev2out=True, # Feed previous word into logit ctx2out=True, # Feed attention weighted ctx into logit alpha_entropy_c=0.002, # hard attn param RL_sumCost=True, # hard attn param semi_sampling_p=0.5, # hard attn param temperature=1., # hard attn param patience=10, max_epochs=5000, dispFreq=1, decay_c=0., # weight decay coeff alpha_c=0., # doubly stochastic coeff lrate=0.01, # used only for SGD selector=True, # selector (see paper) n_words=7632, # vocab size maxlen=100, # maximum length of the description optimizer='adam', batch_size=64, valid_batch_size=64, saveto='model2.npz', # relative path of saved model file validFreq=1000, saveFreq=100, # save the parameters after every saveFreq updates sampleFreq=500, # generate some samples after every sampleFreq updates dataset='flickr8k', dictionary=None, # word dictionary use_dropout= False, # setting this true turns on dropout at various points use_dropout_lstm=False, # dropout on lstm gates reload_=False, save_per_epoch=False, dev_references='./Flickr8k/cap_features/ref/dev', test_references='./Flickr8k/cap_features/ref/test', use_metrics=True, metric='Bleu_4') # this saves down the model every epoch print "Bleu_4: %.4f" % m_score
def main(args): monitor = Monitor('{}/{}_status.json'.format(args['out_dir'].rstrip('/'), args["model"])) try: if args['type'] == 'normal': from capgen import train _, validerr, _ = train(out_dir=args['out_dir'].rstrip('/'), data_dir=args['data_dir'].rstrip('/'), saveto=args["model"], attn_type='deterministic', reload_=args['reload'], dim_word=512, ctx_dim=512, dim=1800, n_layers_att=2, n_layers_out=1, n_layers_lstm=1, n_layers_init=2, n_words=10000, lstm_encoder=False, decay_c=0., alpha_c=1., prev2out=True, ctx2out=True, lrate=0.01, optimizer='adam', selector=True, patience=10, maxlen=100, batch_size=64, valid_batch_size=64, validFreq=2000, dispFreq=1, saveFreq=1000, sampleFreq=250, dataset="coco", use_dropout=True, use_dropout_lstm=False, save_per_epoch=False, monitor=monitor) print "Final cost: {:.2f}".format(validerr.mean()) elif args['type'] == 't_attn': from capgen_text import train out_dir = args['out_dir'].rstrip('/') saveto = args['model'] _, validerr, _ = train(out_dir=out_dir, data_dir=args['data_dir'].rstrip('/'), saveto=saveto, attn_type='deterministic', reload_=args['reload'], dim_word=512, ctx_dim=512, tex_dim=args['tex_dim'], dim=1800, n_layers_att=2, n_layers_out=1, n_layers_lstm=1, n_layers_init=2, n_words=10000, lstm_encoder=False, lstm_encoder_context=args['lenc'], decay_c=0., alpha_c=1., prev2out=True, ctx2out=True, tex2out=True, lrate=0.01, optimizer='adam', selector=True, patience=10, maxlen=100, batch_size=32, valid_batch_size=32, validFreq=2000, dispFreq=1, saveFreq=1000, sampleFreq=250, dataset="coco", use_dropout=True, use_dropout_lstm=False, save_per_epoch=False, monitor=monitor) print "Final cost: {:.2f}".format(validerr.mean()) # Store data preprocessing type in the options file with open('{}/{}.pkl'.format(out_dir, saveto)) as f_opts: opts = pkl.load(f_opts) opts['preproc_type'] = args['preproc_type'] preproc_params = {} for param in args['preproc_params'].split(','): if param: key, value = param.split('=') if value.isdigit(): value = int(value) preproc_params[key] = value opts['preproc_params'] = preproc_params pkl.dump(opts, f_opts) except (KeyboardInterrupt, SystemExit): print 'Interrupted!' monitor.error_message = 'Interrupted!' monitor.status = 12 except Exception, e: print 'Unexpected error!' monitor.error_message = str(e) monitor.status = 12 raise e
import sys sys.path.insert(0, '../../') from capgen import train # training train(dataset='flickr30k')