def run_sampler(dec, c, beam_width=1, stochastic=False, use_unk=False): """ Generate text conditioned on c """ sample, score = gen_sample(dec['tparams'], dec['f_init'], dec['f_next'], c.reshape(1, dec['options']['dimctx']), dec['options'], trng=dec['trng'], k=beam_width, maxlen=1000, stochastic=stochastic, use_unk=use_unk) text = [] if stochastic: sample = [sample] for c in sample: foos = [] for w in c[:-1]: #foo = dec['word_idict'][w] #for w in c[:-1] if w in dec['word_idict']: foos.append(dec['word_idict'][w]) chars = ' '.join(foos) text.append(chars) #text.append(' '.join([dec['word_idict'][w] for w in c[:-1]])) #Sort beams by their NLL, return the best result lengths = numpy.array([len(s.split()) for s in text]) if lengths[0] == 0: # in case the model only predicts <eos> lengths = lengths[1:] score = score[1:] text = text[1:] sidx = numpy.argmin(score) text = text[sidx] score = score[sidx] return text
def run_sampler(dec, c, beam_width=1, stochastic=False, use_unk=False): """ Generate text conditioned on c """ sample, score = gen_sample(dec['tparams'], dec['f_init'], dec['f_next'], c.reshape(1, dec['options']['dimctx']), dec['options'], trng=dec['trng'], k=beam_width, maxlen=1000, stochastic=stochastic, use_unk=use_unk) text = [] if stochastic: sample = [sample] for c in sample: text.append(' '.join([dec['word_idict'][w] for w in c[:-1]])) #Sort beams by their NLL, return the best result lengths = numpy.array([len(s.split()) for s in text]) if lengths[0] == 0: # in case the model only predicts <eos> lengths = lengths[1:] score = score[1:] text = text[1:] sidx = numpy.argmin(score) text = text[sidx] score = score[sidx] return text
def run_sampler(dec, c, beam_width=1, stochastic=False, use_unk=False): """ Generate text conditioned on c """ if stochastic and beam_width > 1: print ("Beam search does not support stochastic sampling. " + "Setting beam_width to 1\n") beam_width = 1 sample, score = gen_sample(dec['tparams'], dec['f_init'], dec['f_next'], c.reshape(1, dec['options']['dimctx']), dec['options'], trng=dec['trng'], k=beam_width, maxlen=1000, stochastic=stochastic, use_unk=use_unk) text = [] if stochastic: sample = [sample] score = [score] for c in sample: text.append(' '.join([dec['word_idict'][w] for w in c[:-1]])) #Sort beams by their NLL, return the best result lengths = numpy.array([len(s.split()) for s in text]) if lengths[0] == 0: # in case the model only predicts <eos> lengths = lengths[1:] score = score[1:] text = text[1:] sidx = numpy.argmin(score) text = text[sidx] score = score[sidx] return text
def run_sampler(dec, c, beam_width=1, stochastic=False, use_unk=False): """ Generate text conditioned on c """ sample, score = gen_sample(dec['tparams'], dec['f_init'], dec['f_next'], c.reshape(1, dec['options']['dimctx']), dec['options'], trng=dec['trng'], k=beam_width, maxlen=1000, stochastic=stochastic, use_unk=use_unk) text = [] if stochastic: sample = [sample] for c in sample: text.append(' '.join([dec['word_idict'][w] for w in c[:-1]])) return text
def trainer(X, C, stmodel, dimctx=4800, #vector dimensionality dim_word=620, # word vector dimensionality dim=1600, # the number of GRU units encoder='gru', decoder='gru', doutput=False, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=40000, maxlen_w=100, optimizer='adam', batch_size = 16, saveto='adventuremodel.npz', dictionary='/home/jm7432/tell-tall-tales/decoding/adventure_dict_final.pkl', embeddings=None, saveFreq=1000, sampleFreq=100, reload_=False): # Model options model_options = {} model_options['dimctx'] = dimctx model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['doutput'] = doutput model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['sampleFreq'] = sampleFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings != None: print 'Loading embeddings...' with open(embeddings, 'rb') as f: embed_map = pkl.load(f) dim_word = len(embed_map.values()[0]) model_options['dim_word'] = dim_word preemb = norm_weight(n_words, dim_word) pz = defaultdict(lambda : 0) for w in embed_map.keys(): pz[w] = 1 for w in worddict.keys()[:n_words-2]: if pz[w] > 0: preemb[worddict[w]] = embed_map[w] else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, c in train_iter: n_samples += len(x) uidx += 1 x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, mask, ctx) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: x_s = x mask_s = mask ctx_s = ctx for jj in xrange(numpy.minimum(10, len(ctx_s))): sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options, trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False) print 'Truth ',jj,': ', for vv in x_s[:,jj]: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print for kk, ss in enumerate([sample[0]]): print 'Sample (', kk,') ', jj, ': ', for vv in ss: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print print 'Seen %d samples'%n_samples
def trainer(X, C, stmodel, dimctx=4800, #vector dimensionality dim_word=620, # word vector dimensionality dim=1600, # the number of GRU units encoder='gru', decoder='gru', doutput=False, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=40000, maxlen_w=100, optimizer='adam', batch_size = 16, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', embeddings=None, saveFreq=1000, sampleFreq=100, reload_=False): # Model options model_options = {} model_options['dimctx'] = dimctx model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['doutput'] = doutput model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['sampleFreq'] = sampleFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings != None: print 'Loading embeddings...' with open(embeddings, 'rb') as f: embed_map = pkl.load(f) dim_word = len(embed_map.values()[0]) model_options['dim_word'] = dim_word preemb = norm_weight(n_words, dim_word) pz = defaultdict(lambda : 0) for w in embed_map.keys(): pz[w] = 1 for w in worddict.keys()[:n_words-2]: if pz[w] > 0: preemb[worddict[w]] = embed_map[w] else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, c in train_iter: n_samples += len(x) uidx += 1 x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, mask, ctx) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: x_s = x mask_s = mask ctx_s = ctx for jj in xrange(numpy.minimum(10, len(ctx_s))): sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options, trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False) print 'Truth ',jj,': ', for vv in x_s[:,jj]: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print for kk, ss in enumerate([sample[0]]): print 'Sample (', kk,') ', jj, ': ', for vv in ss: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print print 'Seen %d samples'%n_samples