def main(argv=None): args = commandLineParser.parse_args() if os.path.isdir('CMDs'): with open('CMDs/step_test_rnnlm.txt', 'a') as f: f.write(' '.join(sys.argv) + '\n') else: os.mkdir('CMDs') with open('CMDs/step_test_rnnlm.txt', 'a') as f: f.write(' '.join(sys.argv) + '\n') valid_data = process_data_lm("valid.dat", path="data", spId=False, input_index='input.wlist.index', output_index='input.wlist.index', bptt=None) network_architecture = parse_params('./config') rnnlm = RNNLM(network_architecture=network_architecture, seed=args.seed, name=args.name, dir='./', load_path=args.load_path, debug_mode=args.debug) print 'Training Completed. Now predicting on validation data' rnnlm.predict(valid_data)
def rnnlm_generate_sequence(): from rnnlm import RNNLM np.random.seed(10) L = np.random.randn(20, 10) model = RNNLM(L0=L) model.H = np.random.randn(20, 20) s, J = model.generate_sequence(0, 1, maxlen=15) print "dummy J: %g" % J print "dummy seq: len(s) = %d" % len(s) assert (len(s) <= 15 + 1) assert (s[0] == 0) assert (J > 0)
def rnnlm_generate_sequence(): from rnnlm import RNNLM np.random.seed(10) L = np.random.randn(20,10) model = RNNLM(L0 = L) model.H = np.random.randn(20,20) s, J = model.generate_sequence(0,1, maxlen=15) print "dummy J: %g" % J print "dummy seq: len(s) = %d" % len(s) assert(len(s) <= 15+1) assert(s[0] == 0) assert(J > 0)
def train(lr): with open(vocab_freq_file,'r') as f: vocab_freq=pickle.load(f) vocab_p = Q_w(vocab_freq,alpha) J,q=alias_setup(vocab_p) # Load data print 'loading dataset...' train_data=TextIterator(train_datafile,n_batch=n_batch,maxlen=maxlen) valid_data = TextIterator(valid_datafile,n_batch=n_batch,maxlen=maxlen) test_data=TextIterator(test_datafile,n_batch=n_batch,maxlen=maxlen) print 'building model...' model=RNNLM(n_input,n_hidden,vocabulary_size, cell=rnn_cell,optimizer=optimizer,p=p,q_w=vocab_p,k=k) if os.path.isfile(model_dir): print 'loading checkpoint parameters....',model_dir model=load_model(model_dir,model) if goto_line>0: train_data.goto_line(goto_line) print 'goto line:',goto_line print 'training start...' start=time.time() idx = 0 for epoch in xrange(NEPOCH): error = 0 for x,x_mask,y,y_mask in train_data: idx+=1 negy=negative_sample(y,y_mask,k,J,q) cost=model.train(x,x_mask, y, negy,y_mask,lr) #print cost error+=cost if np.isnan(cost) or np.isinf(cost): print 'NaN Or Inf detected!' return -1 if idx % disp_freq==0: logger.info('epoch: %d idx: %d cost: %f ppl: %f' % ( epoch, idx, (error / disp_freq), np.exp(error / (1.0 * disp_freq)))) error=0 if idx%save_freq==0: logger.info( 'dumping...') save_model('./model/parameters_%.2f.pkl'%(time.time()-start),model) if idx % valid_freq==0 : logger.info('validing...') valid_cost,wer=evaluate(valid_data,model) logger.info('validation cost: %f perplexity: %f,word_error_rate:%f' % (valid_cost, np.exp(valid_cost), wer)) if idx % test_freq==0 : logger.info('testing...') test_cost,wer=evaluate(test_data,model) logger.info('test cost: %f perplexity: %f,word_error_rate:%f' % (test_cost, np.exp(test_cost),wer)) print "Finished. Time = "+str(time.time()-start)
def test(): valid_data = TextIterator(valid_datafile, filepath, n_batch=n_batch, brown_or_huffman=brown_or_huffman, mode=matrix_or_vector, word2idx_path=word2idx_path) test_data = TextIterator(test_datafile, filepath, n_batch=n_batch, brown_or_huffman=brown_or_huffman, mode=matrix_or_vector, word2idx_path=word2idx_path) model = RNNLM(n_input, n_hidden, vocabulary_size, cell, optimizer, p, mode=matrix_or_vector) if os.path.isfile(args.model_dir): print 'loading pretrained model:', args.model_dir model = load_model(args.model_dir, model) else: print args.model_dir, 'not found' mean_cost = evaluate(valid_data, model) print 'valid cost:', mean_cost, 'perplexity:', np.exp( mean_cost) #,"word_error_rate:",mean_wer mean_cost = evaluate(test_data, model) print 'test cost:', mean_cost, 'perplexity:', np.exp(mean_cost)
def setUp(self): text = 'You said good-bye and I said hello.' cbm = CountBasedMethod() word_list = cbm.text_to_word_list(text) word_to_id, *_ = cbm.preprocess(word_list) vocab_size = len(word_to_id) wordvec_size = 100 hidden_size = 100 self.rnnlm = RNNLM(vocab_size, wordvec_size, hidden_size) self.xs = np.array([ [0, 4, 4, 1], [4, 0, 2, 1] ]) self.ts = np.array([ [0, 1, 0, 0], [0, 0, 0, 1] ])
def __init__(self, scramble_name='noscramble', bptt=1): self.alpha = .1 self.n_epochs = 100 self.hdim = 10 self.vocab = list( '0123456789+ =') # list of all possible characters we might see self.vdim = len(self.vocab) self.vocabmap = {char: i for i, char in enumerate(self.vocab) } # map char to idx number self.rnn = RNNLM(np.zeros((self.vdim, self.hdim)), U0=np.zeros((2, self.hdim)), bptt=bptt) self.scramble = getattr(self, scramble_name)
def rnnlm_load(): from rnnlm import RNNLM L = np.load('rnnlm.L.npy') print " loaded L: %s" % str(L.shape) H = np.load('rnnlm.H.npy') print " loaded H: %s" % str(H.shape) U = np.load('rnnlm.U.npy') print " loaded U: %s" % str(U.shape) assert (L.shape[0] == U.shape[0]) assert (L.shape[1] == H.shape[1]) assert (H.shape[0] == U.shape[1]) model = RNNLM(L0=L, U0=U) model.params.H[:] = H
def create_model(sess, save_folder, FLAGS, embed_fn): # load vocab & embeddings with open(save_folder + "vocab.pkl", "rb") as handle: vocab = pickle.load(handle) with open(save_folder + "tsf_vocab_inv.pkl", "rb") as handle: tsf_vocab_inv = pickle.load(handle) with open(save_folder + "init_embed.pkl", "rb") as handle: init_embed = pickle.load(handle) with open(save_folder + "tsf_init_embed.pkl", "rb") as handle: tsf_init_embed = pickle.load(handle) vocab_size = len(vocab) tsf_vocab_size = len(tsf_vocab_inv) print("Vocab size: {}, transfer vocab size: {}".format( vocab_size, tsf_vocab_size)) # generator config_list = [(k, FLAGS[k].value) for k in FLAGS] generator_config = OrderedDict( sorted(config_list) + [("encoder_vocab_size", vocab_size), ("decoder_vocab_size", tsf_vocab_size)]) #print("Generator config: {}, cell_type: {}".format(generator_config, "gru")) generator = Generator(generator_config, init_embed, tsf_init_embed) # language model lm_config_list = [(k, FLAGS[k].value) for k in FLAGS if k.startswith("lm_") ] + [("batch_size", FLAGS.batch_size)] lm_config = OrderedDict( sorted(lm_config_list) + [("lm_vocab_size", vocab_size)]) rnnlm = RNNLM(lm_config, init_embed) # style discriminator style_discriminator = StyleDiscriminator(FLAGS.style_num_classes, FLAGS.embedding_dim, \ init_embed, FLAGS.style_hidden_size, \ FLAGS.style_attention_size, FLAGS.max_sent_len, \ FLAGS.style_keep_prob) #embedding_size, init_embed, hidden_size, \ # attention_size, max_sent_len, keep_prob): #siamese discriminator siamese_discrim = SiameseDiscriminator(FLAGS.embedding_dim, \ init_embed, FLAGS.style_hidden_size, \ FLAGS.style_attention_size, FLAGS.max_sent_len, \ FLAGS.style_keep_prob) # semantic discriminator semantic_discriminator = SemanticDiscriminator(embed_fn) # rollout rollout = ROLLOUT(vocab, tsf_vocab_inv) return generator, rnnlm, style_discriminator, siamese_discrim, semantic_discriminator, rollout, vocab, tsf_vocab_inv
def train_rnnlm(train, vocab, hidden_size, epoch_num, batch_size): # モデル初期化 train_data, vocab = load_data("./RNNLM_Chainer/ptb.test.txt") eos_id = vocab['<eos>'] model = RNNLM(len(vocab), hidden_size) optimizer = optimizers.Adam() optimizer.setup(model) # TODO:minibatchでできるようにする # train_dataを文のリストに変換する sents = [] sent = [] for word_id in train_data: sent.append(word_id) if word_id == eos_id: sents.append(sent) sent = [] # 学習・保存 for epoch_i in range(epoch_num): loss_sum = 0.0 random.shuffle(sents) for i, s in enumerate(sents): loss = model(s,train=True) loss_sum += loss model.zerograds() loss.backward() optimizer.update() if (i % 100 == 0): print i, "/", len(sents)," finished" print "epoch " + str(epoch_i) + " finished" print "average loss is " + str(loss_sum/len(sents)) outfile = "rnnlm-" + str(epoch_i) + ".model" serializers.save_npz(outfile, model) loss_sum = 0.0
def setup_and_sample(args): if os.path.isdir(args.init_from): assert os.path.exists(args.init_from), "{} is not a directory".format( args.init_from) parent_dir = args.init_from else: assert os.path.exists("{}.index".format( args.init_from)), "{} is not a checkpoint".format(args.init_from) parent_dir = os.path.dirname(args.init_from) config_file = os.path.join(parent_dir, "config.pkl") vocab_file = os.path.join(parent_dir, "vocab.pkl") assert os.path.isfile( config_file), "config.pkl does not exist in directory {}".format( parent_dir) assert os.path.isfile( vocab_file), "vocab.pkl does not exist in directory {}".format( parent_dir) with open(config_file, 'rb') as f: saved_args = pickle.load(f) with open(vocab_file, 'rb') as f: saved_vocab = pickle.load(f) if os.path.isdir(args.init_from): checkpoint = tf.train.latest_checkpoint(parent_dir) assert checkpoint, "no checkpoint in directory {}".format(init_from) else: checkpoint = args.init_from saved_args.batch_size = 1 saved_args.seq_length = 1 model = RNNLM(saved_args) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) try: saver.restore(sess, checkpoint) except ValueError: print("{} is not a valid checkpoint".format(checkpoint)) ret = sample(model, sess, saved_vocab, args.length, args.temperature, args.prime) return ret
def test(): with open(vocab_freq_file,'r') as f: vocab_freq=pickle.load(f) vocab_p = Q_w(vocab_freq,alpha) J,q=alias_setup(vocab_p) valid_data = TextIterator(valid_datafile,n_batch=n_batch,maxlen=maxlen) test_data=TextIterator(test_datafile,n_batch=n_batch,maxlen=maxlen) model=RNNLM(n_input,n_hidden,vocabulary_size, cell=rnn_cell,optimizer=optimizer,p=p,q_w=vocab_p,k=k) if os.path.isfile(args.model_dir): print 'loading pretrained model:',args.model_dir model=load_model(args.model_dir,model) else: print args.model_dir,'not found' valid_cost, wer = evaluate(valid_data, model,'wer') logger.info('validation cost: %f perplexity: %f,word_error_rate:%f' % (valid_cost, np.exp(valid_cost), wer)) test_cost, wer = evaluate(test_data, model,'wer') logger.info('test cost: %f perplexity: %f,word_error_rate:%f' % (test_cost, np.exp(test_cost), wer))
def __init__(self, scramble_name='two_dig_scramble', bptt=1): # for now, sort of cheat and assume fixed size inputs and outputs self.x_len = 3 self.y_len = 4 self.alpha = .1 self.n_epochs = 40 self.hdim = 50 self.vocab = list( '0123456789+ ') # list of all possible characters we might see self.vdim = len(self.vocab) self.vocabmap = {char: i for i, char in enumerate(self.vocab) } # map char to idx number self.rnns = [ RNNLM(np.zeros((self.vdim, self.hdim)), bptt=bptt) for _ in range(self.y_len) ] self.scramble = getattr(self, scramble_name)
def main(argv=None): args = commandLineParser.parse_args() train_data = process_data_lm('train.dat', 'data', spId=False, input_index='input.wlist.index', output_index='input.wlist.index', bptt=20) #train_data = process_data_lm('train.txt', 'data', spId=True, input_index='input.wlist.index') valid_data = process_data_lm("valid.dat", path="data", spId=False, input_index='input.wlist.index', output_index='input.wlist.index', bptt=None) network_architecture = parse_params('./config') rnnlm = RNNLM(network_architecture=network_architecture, seed=args.seed, name=args.name, dir='./', load_path=args.load_path, debug_mode=args.debug) rnnlm.fit(valid_data, train_data, learning_rate=1e-2, lr_decay=0.94, batch_size=64, dropout=args.dropout, optimizer=tf.train.AdamOptimizer, n_epochs=10) sys.exit() rnnlm.save()
import sys, os from numpy import * from matplotlib.pyplot import * %matplotlib inline matplotlib.rcParams['savefig.dpi'] = 100 %load_ext autoreload %autoreload 2 from rnnlm import RNNLM # Gradient check on toy data, for speed random.seed(10) wv_dummy = random.randn(10,50) model = RNNLM(L0 = wv_dummy, U0 = wv_dummy, alpha=0.005, rseed=10, bptt=4) model.grad_check(array([1,2,3]), array([2,3,4])) from data_utils import utils as du import pandas as pd # Load the vocabulary #vocab = pd.read_table("data/lm/vocab.ptb.txt", header=None, sep="\s+", # index_col=0, names=['count', 'freq'], ) vocab2 = pd.read_table("worddic.txt",header=None,sep="\s+",index_col=0) # Choose how many top words to keep #vocabsize = 2000 vocabsize2 = 58868 #remove for implemenation #num_to_word = dict(enumerate(vocab.index[:vocabsize]))
class TestRNNLM(unittest.TestCase): def setUp(self): text = 'You said good-bye and I said hello.' cbm = CountBasedMethod() word_list = cbm.text_to_word_list(text) word_to_id, *_ = cbm.preprocess(word_list) vocab_size = len(word_to_id) wordvec_size = 100 hidden_size = 100 self.rnnlm = RNNLM(vocab_size, wordvec_size, hidden_size) self.xs = np.array([ [0, 4, 4, 1], [4, 0, 2, 1] ]) self.ts = np.array([ [0, 1, 0, 0], [0, 0, 0, 1] ]) def test_predict(self): score = self.rnnlm._predict(self.xs) self.assertEqual((2, 4, 7), score.shape) def test_forward(self): loss = self.rnnlm.forward(self.xs, self.ts) self.assertEqual(1.94, round(loss, 2)) def test_backward(self): self.rnnlm.forward(self.xs, self.ts) dout = self.rnnlm.backward() self.assertEqual(None, dout) def test_reset_state(self): self.rnnlm.forward(self.xs, self.ts) self.rnnlm.backward() self.assertEqual((2, 100), self.rnnlm.lstm_layer.h.shape) self.rnnlm.reset_state() self.assertEqual(None, self.rnnlm.lstm_layer.h) def test_save_params(self): self.rnnlm.forward(self.xs, self.ts) self.rnnlm.backward() self.rnnlm.save_params() self.assertEqual(True, path.exists('../pkl/rnnlm.pkl')) def test_load_params(self): self.rnnlm.load_params() a, b, c, d, e, f = self.rnnlm.params self.assertEqual((7, 100), a.shape) self.assertEqual((100, 400), b.shape) self.assertEqual((100, 400), c.shape) self.assertEqual((400,), d.shape) self.assertEqual((100, 7), e.shape) self.assertEqual((7,), f.shape)
def train( train_path, validation_path, dictionary_path, model_path, reload_state=False, dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., lrate=0.01, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size=16, valid_batch_size=16, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some text samples after every sampleFreq updates profile=False): # Model options model_options = locals().copy() worddicts = dict() worddicts_r = dict() with open(dictionary_path, 'rb') as f: for (i, line) in enumerate(f): word = line.strip() code = i + 2 worddicts_r[code] = word worddicts[word] = code # reload options if reload_state and os.path.exists(model_path): with open('%s.pkl' % model_path, 'rb') as f: models_options = pkl.load(f) print '### Loading data.' train = TextIterator(train_path, worddicts, n_words_source=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(validation_path, worddicts, n_words_source=n_words, batch_size=valid_batch_size, maxlen=maxlen) print '### Building neural network.' rnnlm = RNNLM(model_options) trainer = ModelTrainer(rnnlm, optimizer, model_options) sampler = TextSampler(rnnlm, model_options) print '### Training neural network.' best_params = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size if sampleFreq == -1: sampleFreq = len(train[0]) / batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x in train: n_samples += len(x) uidx += 1 x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = trainer.f_grad_shared(x, x_mask) trainer.f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: # Save the best parameters, or the current state if best_params # is None. rnnlm.save_params(best_params) # Save the training options. pkl.dump(model_options, open('%s.pkl' % model_path, 'wb')) if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(5): sample, score = sampler.generate() print 'Sample ', jj, ': ', ss = sample for vv in ss: if vv == 0: break if vv in worddicts_r: print worddicts_r[vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() rnnlm.error_history.append(valid_err) if uidx == 0 or valid_err <= numpy.array(error_history).min(): best_params = rnnlm.get_param_values() bad_counter = 0 if len(rnnlm.error_history ) > patience and valid_err >= numpy.array( rnnlm.error_history)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples' % n_samples if estop: break if best_params is not None: rnnlm.set_param_values(best_params) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_params) numpy.savez(model_path, zipped_params=best_params, error_history=rnnlm.error_history, **params) return valid_err
def train(config, sw): # Initialize the device which to run the model on device = torch.device(config.device) vocab = torchtext.vocab.FastText() #vocab = torchtext.vocab.GloVe() # get data iterators lm_iters, s_iters = load_data(embeddings=vocab, device=device, batch_size=config.batch_size, bptt_len=config.seq_len) _, valid_iter, test_iter, field = s_iters vocab = field.vocab if config.use_bptt: train_iter, _, _, _ = lm_iters else: train_iter, _, _, _ = s_iters print("Vocab size: {}".format(vocab.vectors.shape)) # create embedding layer embedding = nn.Embedding.from_pretrained(vocab.vectors).to(device) EMBED_DIM = 300 num_classes = vocab.vectors.shape[0] # Initialize the model that we are going to use if config.model == "rnnlm": model = RNNLM(EMBED_DIM, config.hidden_dim, num_classes) elif config.model == "s-vae": model = SentenceVAE(EMBED_DIM, config.hidden_dim, num_classes, fb_lambda=config.freebits_lambda, wd_keep_prob=config.wdropout_prob, wd_unk=embedding( torch.LongTensor([vocab.stoi["<unk>"] ]).to(device)), mu_f_beta=config.mu_forcing_beta) else: raise Error("Invalid model parameter.") model = model.to(device) # Setup the loss, optimizer, lr-scheduler optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) criterion = torch.nn.NLLLoss(reduction="sum").to(config.device) scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=config.learning_rate_decay) lr = config.learning_rate global_step = 0 best_nll = sys.maxsize best_pp = sys.maxsize best_kl = None for epoch in itertools.count(): for batch in train_iter: # [1] Get data if config.use_bptt: batch_text = batch.text batch_target = batch.target txt_len = torch.full((batch_text.shape[1], ), batch_text.shape[0], device=device) tgt_len = txt_len else: batch_text, txt_len = batch.text batch_target, tgt_len = batch.target batch_text = embedding(batch_text.to(device)) batch_target = batch_target.to(device) # [2] Forward & Loss batch_output = model(batch_text, txt_len) # merge batch and sequence dimension for evaluation batch_output = batch_output.view(-1, batch_output.shape[2]) batch_target = batch_target.view(-1) B = batch_text.shape[1] nll = criterion(batch_output, batch_target) / B sw.add_scalar('Train/NLL', nll.item(), global_step) loss = nll.clone() for loss_name, additional_loss in model.get_additional_losses( ).items(): loss += additional_loss sw.add_scalar('Train/' + loss_name, additional_loss, global_step) # [3] Optimize optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() sw.add_scalar('Train/Loss', loss.item(), global_step) if global_step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, " "NLL = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), global_step, config.train_steps, nll.item(), loss.item()), flush=True) global_step += 1 epoch_nll, epoch_pp, epoch_kl, additional_losses = test_model( model, embedding, criterion, valid_iter, device) model.train() print("Valid NLL: {}".format(epoch_nll)) print("Valid Perplexity: {}".format(epoch_pp)) print("Valid KL: {}".format(epoch_kl)) sw.add_scalar('Valid/NLL', epoch_nll, global_step) sw.add_scalar('Valid/Perplexity', epoch_pp, global_step) sw.add_scalar('Valid/KL', epoch_kl, global_step) # the additional_loss below will also have kl but not multisample for loss_name, additional_loss in additional_losses.items(): sw.add_scalar('Valid/' + loss_name, additional_loss, global_step) # sample some sentences MAX_LEN = 50 for _ in range(5): text = model.temperature_sample(embedding, MAX_LEN) text = ' '.join(vocab.itos[w] for w in text) print(text) sw.add_text('Valid/Sample-text', text, global_step) if epoch_nll < best_nll: best_nll = epoch_nll save_model("best", model, config) if epoch_pp < best_pp: best_pp = epoch_pp if global_step >= config.train_steps: break scheduler.step() print("Learning Rate: {}".format( [group['lr'] for group in optimizer.param_groups])) print('Done training.') best_model = load_model("best", config) test_nll, test_pp, test_kl, test_additional_losses = test_model( best_model, embedding, criterion, test_iter, device) print("Test NLL: {}".format(test_nll)) print("Test PP: {}".format(test_pp)) print("Test KL: {}".format(test_kl)) print("{}".format(test_additional_losses)) return best_model, model, {'hparam/nll': best_nll, 'hparam/pp': best_pp}
def train(lr): # Load data logger.info('loading dataset...') train_data = TextIterator(train_datafile, filepath, n_batch=n_batch, brown_or_huffman=brown_or_huffman, mode=matrix_or_vector, word2idx_path=word2idx_path) valid_data = TextIterator(valid_datafile, filepath, n_batch=n_batch, brown_or_huffman=brown_or_huffman, mode=matrix_or_vector, word2idx_path=word2idx_path) test_data = TextIterator(test_datafile, filepath, n_batch=n_batch, brown_or_huffman=brown_or_huffman, mode=matrix_or_vector, word2idx_path=word2idx_path) logger.info('building model...') model = RNNLM(n_input, n_hidden, vocabulary_size, cell, optimizer, p=p, mode=matrix_or_vector) if os.path.exists(model_dir) and reload_dumps == 1: logger.info('loading parameters from: %s' % model_dir) model = load_model(model_dir, model) else: logger.info("init parameters....") logger.info('training start...') start = time.time() idx = 0 for epoch in xrange(NEPOCH): error = 0 for x, x_mask, (y_node, y_choice, y_bit_mask), y_mask in train_data: idx += 1 cost = model.train(x, x_mask, y_node, y_choice, y_bit_mask, y_mask, lr) error += cost if np.isnan(cost) or np.isinf(cost): print 'NaN Or Inf detected!' return -1 if idx % disp_freq == 0: logger.info('epoch: %d idx: %d cost: %f ppl: %f' % (epoch, idx, error / disp_freq, np.exp(error / (1.0 * disp_freq)))) #,'lr:',lr error = 0 if idx % save_freq == 0: logger.info('dumping...') save_model( './model/parameters_%.2f.pkl' % (time.time() - start), model) if idx % valid_freq == 0: logger.info('validing....') valid_cost = evaluate(valid_data, model) logger.info('valid_cost: %f perplexity: %f' % (valid_cost, np.exp(valid_cost))) if idx % test_freq == 0: logger.info('testing...') test_cost = evaluate(test_data, model) logger.info('test cost: %f perplexity: %f' % (test_cost, np.exp(test_cost))) #if idx%clip_freq==0 and lr >=0.01: # print 'cliping learning rate:', # lr=lr*0.9 # print lr sys.stdout.flush() print "Finished. Time = " + str(time.time() - start)
wordvec_size = 100 hidden_size = 100 time_size = 35 learning_rate = 20.0 max_epoch = 4 max_grad = 0.25 # Load trainig data corpus, word_to_id, id_to_word = load_data('train') corpus_test, *_ = load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] # Generate a model, optimiser and trainer model = RNNLM(vocab_size, wordvec_size, hidden_size) optimiser = SGD(learning_rate) trainer = RNNLMTrainer(model, optimiser) # 1. Train applying gradients clipping training_process = trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20) for iter in training_process: print(iter) file_path = '../img/train_rnnlm.png' tainer.save_plot_image(file_path, ylim=(0, 500))
method = "RNNPTONE" hdim = 40 # dimension of hidden layer = dimension of word vectors #random.seed(10) nepoch = 1 N = nepoch * len(Y_train) k = 5 # minibatch size fraction_lost = 0.07923163705 #idx=[] #print X_train.size #for i in range(N/k): # idx.append(random.choice(len(Y_train),k)) if method == "RNNLM": L0 = zeros((vocabsize, hdim)) # replace with random init, # or do in RNNLM.__init__() model = RNNLM(L0, U0 = L0, alpha=0.1, bptt=3) idx = epochiter(len(Y_train), nepoch) model.train_sgd(X = X_train, y = Y_train, idxiter = idx, printevery = 500, costevery = 500) dev_loss = model.compute_mean_loss(X_dev, Y_dev) if not os.path.exists("model/" + method): os.makedirs("model/" + method) print "Unadjusted: %.03f" % exp(dev_loss) print "Adjusted for missing vocab: %.03f" % exp(adjust_loss(dev_loss, fraction_lost)) save("model/" + method + "/rnnlm.L.npy", model.sparams.L) save("model/" + method + "/rnnlm.U.npy", model.params.U) save("model/" + method + "/rnnlm.H.npy", model.params.H) print "RNNLM"
def train(train_path, validation_path, dictionary_path, model_path, reload_state=False, dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., lrate=0.01, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size = 16, valid_batch_size = 16, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some text samples after every sampleFreq updates profile=False): # Model options model_options = locals().copy() worddicts = dict() worddicts_r = dict() with open(dictionary_path, 'rb') as f: for (i, line) in enumerate(f): word = line.strip() code = i + 2 worddicts_r[code] = word worddicts[word] = code # reload options if reload_state and os.path.exists(model_path): with open('%s.pkl' % model_path, 'rb') as f: models_options = pkl.load(f) print '### Loading data.' train = TextIterator(train_path, worddicts, n_words_source=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(validation_path, worddicts, n_words_source=n_words, batch_size=valid_batch_size, maxlen=maxlen) print '### Building neural network.' rnnlm = RNNLM(model_options) trainer = ModelTrainer(rnnlm, optimizer, model_options) sampler = TextSampler(rnnlm, model_options) print '### Training neural network.' best_params = None bad_count = 0 if validFreq == -1: validFreq = len(train[0])/batch_size if saveFreq == -1: saveFreq = len(train[0])/batch_size if sampleFreq == -1: sampleFreq = len(train[0])/batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x in train: n_samples += len(x) uidx += 1 x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = trainer.f_grad_shared(x, x_mask) trainer.f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: # Save the best parameters, or the current state if best_params # is None. rnnlm.save_params(best_params) # Save the training options. pkl.dump(model_options, open('%s.pkl' % model_path, 'wb')) if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(5): sample, score = sampler.generate() print 'Sample ', jj, ': ', ss = sample for vv in ss: if vv == 0: break if vv in worddicts_r: print worddicts_r[vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() rnnlm.error_history.append(valid_err) if uidx == 0 or valid_err <= numpy.array(error_history).min(): best_params = rnnlm.get_param_values() bad_counter = 0 if len(rnnlm.error_history) > patience and valid_err >= numpy.array(rnnlm.error_history)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb; ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples'%n_samples if estop: break if best_params is not None: rnnlm.set_param_values(best_params) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_params) numpy.savez(model_path, zipped_params=best_params, error_history=rnnlm.error_history, **params) return valid_err
pass parser = argparse.ArgumentParser(description="A program for testing RNNLM.") parser.add_argument("sentence", action="store", type=str, help="a sentence that you want to test.") parser.add_argument("model_file_path", action="store", type=str, help="a model file path that you want to test.") parser.add_argument("vocab_file_path", action="store", type=str, help="a vocab file used to train the model.") parser.add_argument("hidden_size", action="store", type=int, help="a hidden size of RNN.") args = parser.parse_args() # train時にpickleしたvocabファイル(word->id)をロード with open(args.vocab_file_path, "rb") as f: vocab = pickle.load(f) # modelを読み込み model = RNNLM(len(vocab), args.hidden_size) serializers.load_npz(args.model_file_path, model) test_rnnlm(args.sentence, model, vocab, args.hidden_size)
def rnnlm_init(): from rnnlm import RNNLM np.random.seed(10) L = np.random.randn(50, 10) model = RNNLM(L0=L)
def main(): random.seed(SEED) np.random.seed(SEED) tf.random.set_seed(SEED) assert START_TOKEN == 0 physical_devices = tf.config.experimental.list_physical_devices("GPU") if len(physical_devices) > 0: for dev in physical_devices: tf.config.experimental.set_memory_growth(dev, True) generator = Generator(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_lstm = RNNLM(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=VOCAB_SIZE, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, dropout_keep_prob=dis_dropout_keep_prob, l2_reg_lambda=dis_l2_reg_lambda) gen_dataset = dataset_for_generator(positive_file, BATCH_SIZE) log = open('save/experiment-log.txt', 'w') # pre-train generator if not os.path.exists("save/generator_pretrained.h5"): print('Start pre-training...') log.write('pre-training...\n') generator.pretrain(gen_dataset, target_lstm, PRE_EPOCH_NUM, generated_num // BATCH_SIZE, eval_file) generator.save("save/generator_pretrained.h5") else: generator.load("save/generator_pretrained.h5") if not os.path.exists("discriminator_pretrained.h5"): print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for _ in range(50): print("Dataset", _) generator.generate_samples(generated_num // BATCH_SIZE, negative_file) dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE) discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2) discriminator.save("save/discriminator_pretrained.h5") else: discriminator.load("save/discriminator_pretrained.h5") rollout = ROLLOUT(generator, 0.8) print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): print("Generator", total_batch, 'of ', TOTAL_BATCH) # Train the generator for one step for it in range(1): samples = generator.generate_one_batch() rewards = rollout.get_reward(samples, 16, discriminator) generator.train_step(samples, rewards) # Test if total_batch % 10 == 0 or total_batch == TOTAL_BATCH - 1: generator.generate_samples(generated_num // BATCH_SIZE, eval_file) likelihood_dataset = dataset_for_generator(eval_file, BATCH_SIZE) test_loss = target_lstm.target_loss(likelihood_dataset) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' print('total_batch: ', total_batch, 'of: ', TOTAL_BATCH, 'test_loss: ', test_loss) generator.save(f"save/generator_{total_batch}.h5") discriminator.save(f"save/discriminator_{total_batch}.h5") log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator print("Discriminator", total_batch, 'of ', TOTAL_BATCH) # There will be 5 x 3 = 15 epochs in this loop for _ in range(5): generator.generate_samples(generated_num // BATCH_SIZE, negative_file) dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE) discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2) generator.save(f"save/generator_{TOTAL_BATCH}.h5") discriminator.save(f"save/discriminator_{TOTAL_BATCH}.h5") log.close()
class NaiveRnnlmDiscr: def __init__(self, scramble_name='noscramble', bptt=1): self.alpha = .1 self.n_epochs = 100 self.hdim = 10 self.vocab = list( '0123456789+ =') # list of all possible characters we might see self.vdim = len(self.vocab) self.vocabmap = {char: i for i, char in enumerate(self.vocab) } # map char to idx number self.rnn = RNNLM(np.zeros((self.vdim, self.hdim)), U0=np.zeros((2, self.hdim)), bptt=bptt) self.scramble = getattr(self, scramble_name) def encode_expr(self, expr): return [self.vocabmap[c] for c in expr] def decode(self, indices): return ''.join([self.vocab[idx] for idx in indices]) def lengthen_double(self, x_string): # format from '21 + 12' -> '021 + 012' return ' + '.join( [lengthen(s, self.x_len) for s in x_string.split(' + ')]) def scramble_double(self, x_string): # format 'abc + 123' to 'a1b2c3' lengthened = self.lengthen_double(x_string) nums = lengthened.split(' + ') return ''.join([x1 + x2 for x1, x2 in zip(nums[0], nums[1])]) def noscramble(self, x_string): return x_string # def unscrambled_simple(self, x_string, i): # return ''.join(c for c in self.lengthen_double(x_string) if c != ' ' and c != '+') # def scramble_simple(self, x_string, i): # return self.scramble_double(x_string) # def two_dig_scramble(self, x_string, i): # # where i is the output digit we're computing # # in my opinion, this function knows a little too much about how to pick our digits # x_slice = slice(0, 2) if i == 0 else slice(2*(i-1), 2*i) # return self.scramble_double(x_string)[x_slice] # def rot_scramble(self, x_string, i): # six_digs = self.scramble_double(x_string) # start_dig = 0 if i == 0 else i - 1 # return [c for c in reversed(six_digs[start_dig:] + six_digs[:start_dig])] # def rot_scramble_half(self, x_string, i): # return self.rot_scramble(x_string, i)[3:] def train(self, xy_data, rnn=None): # This function trains one RNN self.rnn = rnn if rnn is not None else self.rnn xs = [np.array(self.encode_expr(self.scramble(x))) for x, y in xy_data] ys = [y for x, y in xy_data] # for printing purposes only dev_data = get_data('data/neg_dev.txt') dev_xs = [ np.array(self.encode_expr(self.scramble(x))) for x, y in dev_data ] dev_ys = [y for x, y in dev_data] self.rnn.grad_check(dev_xs[0], dev_ys[0]) for j in xrange(self.n_epochs): for x, y in zip(xs, ys): self.rnn.train_point_sgd(x, y, self.alpha) # print 'train loss', rnn_i.compute_loss(xs_i, ys_i) if j % 10 == 0: print 'dev loss', self.rnn.compute_loss( dev_xs[:100], dev_ys[:100]), 'train loss', self.rnn.compute_loss( xs[:100], ys[:100]) # # extra stuff to print # for x,y in zip(xs_i,ys)[:5]: # yhat = rnn_i.predict(x) # print x, yhat, np.argmax(yhat) return self.rnn def predict_one(self, x, rnn=None): rnn = rnn if rnn is not None else self.rnn if rnn is None: raise Exception('Model not trained!') x_encoded = self.encode_expr(self.scramble(x)) return np.argmax(rnn.predict(x_encoded))
def train(args): print(vars(args)) loader = SequenceLoader(args) if args.init_from is not None: if os.path.isdir(args.init_from): assert os.path.exists(args.init_from), "{} is not a directory".format(args.init_from) parent_dir = args.init_from else: assert os.path.exists("{}.index".format(args.init_from)), "{} is not a checkpoint".format(args.init_from) parent_dir = os.path.dirname(args.init_from) config_file = os.path.join(parent_dir, "config.pkl") vocab_file = os.path.join(parent_dir, "vocab.pkl") assert os.path.isfile(config_file), "config.pkl does not exist in directory {}".format(parent_dir) assert os.path.isfile(vocab_file), "vocab.pkl does not exist in directory {}".format(parent_dir) if os.path.isdir(args.init_from): checkpoint = tf.train.latest_checkpoint(parent_dir) assert checkpoint, "no checkpoint in directory {}".format(args.init_from) else: checkpoint = args.init_from with open(os.path.join(parent_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) assert saved_args.hidden_size == args.hidden_size, "hidden size argument ({}) differs from save ({})" \ .format(saved_args.hidden_size, args.hidden_size) assert saved_args.num_layers == args.num_layers, "number of layers argument ({}) differs from save ({})" \ .format(saved_args.num_layers, args.num_layers) with open(os.path.join(parent_dir, 'vocab.pkl'), 'rb') as f: saved_vocab = pickle.load(f) assert saved_vocab == loader.vocab, "vocab in data directory differs from save" if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) new_config_file = os.path.join(args.save_dir, 'config.pkl') new_vocab_file = os.path.join(args.save_dir, 'vocab.pkl') if not os.path.exists(new_config_file): with open(new_config_file, 'wb') as f: pickle.dump(args, f) if not os.path.exists(new_vocab_file): with open(new_vocab_file, 'wb') as f: pickle.dump(loader.vocab, f) model = RNNLM(args) with tf.Session() as sess: #tf.summary.merge_all() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if args.init_from is not None: try: saver.restore(sess, checkpoint) except ValueError: print("{} is not a valid checkpoint".format(checkpoint)) print("initializing from {}".format(checkpoint)) start_datetime = datetime.datetime.now().isoformat() # if args.tensorboard: train_writer = tf.summary.FileWriter(os.path.join(args.save_dir, start_datetime)) train_writer.add_graph(sess.graph) for e in range(args.num_epochs): if e % args.decay_every == 0: lr = args.learning_rate * (args.decay_factor ** e) state = sess.run(model.zero_state) for b, (x, y) in enumerate(loader.train): global_step = e * loader.train.num_batches + b start = time.time() feed = {model.x: x, model.y: y, model.dropout: args.dropout, model.lr: lr} state_feed = {pl: s for pl, s in zip(sum(model.start_state, ()), sum(state, ()))} feed.update(state_feed) train_loss, state, _ = sess.run([model.loss, model.end_state, model.train_op], feed) end = time.time() # if args.verbose: print("{}/{} (epoch {}), train_loss = {:.3f}, perplexity = {:.3f}, time/batch = {:.3f}".format( global_step, args.num_epochs * loader.train.num_batches, e, train_loss, np.exp(train_loss), end - start)) # if args.tensorboard: summary = tf.Summary( value=[tf.Summary.Value(tag="RNNLM Train Loss", simple_value=float(train_loss))]) train_writer.add_summary(summary, global_step) summary1 = tf.Summary( value=[tf.Summary.Value(tag="RNNLM Train Perplexity", simple_value=float(np.exp(train_loss)))]) train_writer.add_summary(summary1, global_step) if global_step % args.save_every == 0 \ or (e == args.num_epochs - 1 and b == loader.train.num_batches - 1): all_loss = 0 val_state = sess.run(model.zero_state) start = time.time() for b, (x, y) in enumerate(loader.val): feed = {model.x: x, model.y: y} state_feed = {pl: s for pl, s in zip(sum(model.start_state, ()), sum(val_state, ()))} feed.update(state_feed) batch_loss, val_state = sess.run([model.loss, model.end_state], feed) all_loss += batch_loss end = time.time() val_loss = all_loss / loader.val.num_batches # if args.verbose: print("val_loss = {:.3f}, perplexity = {:.3f}, time/val = {:.3f}".format(val_loss, np.exp(val_loss), end - start)) checkpoint_path = os.path.join(args.save_dir, '{}-iter_{}-val_{:.3f}.ckpt' \ .format(start_datetime, global_step, val_loss)) saver.save(sess, checkpoint_path) # if args.verbose: print("model saved to {}".format(checkpoint_path)) # if args.tensorboard: summary = tf.Summary( value=[tf.Summary.Value(tag="RNNLM Val Loss", simple_value=float(val_loss))]) tf.summary.histogram('Val_Perplexity', val_loss) # tf.histogram_summary('Val_Perplexity', val_loss) train_writer.add_summary(summary, global_step) summary1 = tf.Summary( value=[tf.Summary.Value(tag="RNNLM Val Perplexity", simple_value=float(np.exp(val_loss)))]) # tf.summary.histogram('Val_Perplexity', np.exp(val_loss)) train_writer.add_summary(summary1, global_step) tf.summary.merge_all()
# Gradient check is going to take a *long* time here # since it's quadratic-time in the number of parameters. # run at your own risk... (but do check this!) # model.grad_check(array([1,2,3]), array([2,3,4])) #### YOUR CODE HERE #### ## # Pare down to a smaller dataset, for speed # (optional - recommended to not do this for your final model) hdim = 100 # dimension of hidden layer = dimension of word vectors random.seed(10) L0 = zeros((vocabsize, hdim)) # replace with random init, L0 = 0.1 * random.randn(*L0.shape) # or do in RNNLM.__init__() # test parameters; you probably want to change these model = RNNLM(L0, U0 = L0, alpha=0.1, rseed=10, bptt=3) ntrain = len(Y_train) X = X_train[:ntrain] Y = Y_train[:ntrain] k = 5 indices = range(ntrain) def idxiter_batches(): num_batches = ntrain / k for i in xrange(num_batches): yield random.choice(indices, k) model_output = model.train_sgd(X=X, y=Y, idxiter=idxiter_batches(), printevery=100, costevery=10000) dev_loss = model.compute_mean_loss(X_dev, Y_dev) ## DO NOT CHANGE THIS CELL ## # Report your numbers, after computing dev_loss above.
from rnnlm import RNNLM from better_rnnlm import BetterRNNLM from datasets import ptb from commons.util import eval_perplexity if __name__ == '__main__': # select model for evaluation model = RNNLM() # model = BetterRNNLM() # read tunned params model.load_params() corpus, _, _ = ptb.load_data('test') model.reset_state() ppl_test = eval_perplexity(model, corpus) print('Test Perplexity:', ppl_test)