def validate(args, model: CharRNN, criterion, char_to_id, pbar=False): model.eval() valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8') batch_size = 1 window_size = 4096 hidden = model.init_hidden(batch_size) total_loss = n_chars = 0 total_word_loss = n_words = 0 r = tqdm.trange if pbar else range for idx in r( 0, min(args.valid_chars or len(valid_corpus), len(valid_corpus) - 1), window_size): chunk = valid_corpus[idx:idx + window_size + 1] inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0), volatile=True) targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0)) losses = [] for c in range(inputs.size(1)): output, hidden = model(inputs[:, c], hidden) loss = criterion(output.view(batch_size, -1), targets[:, c]) losses.append(loss.data[0]) n_chars += 1 total_loss += np.sum(losses) word_losses = word_loss(chunk, losses) total_word_loss += np.sum(word_losses) n_words += len(word_losses) mean_loss = total_loss / n_chars mean_word_perplexity = np.exp(total_word_loss / n_words) print('Validation loss: {:.3}, word perplexity: {:.1f}'.format( mean_loss, mean_word_perplexity)) return { 'valid_loss': mean_loss, 'valid_word_perplexity': mean_word_perplexity }
def sample(model: CharRNN, char2int: dict, prime='The', num_chars=1000, top_k=5): """ Given a network and a char2int map, predict the next 1000 characters """ device = next(model.parameters()).device.type int2char = {ii: ch for ch, ii in char2int.items()} # set our model to evaluation mode, we use dropout after all model.eval() # First off, run through the prime characters chars = [char2int[ch] for ch in prime] h = model.init_hidden(1, device) for ch in chars: char, h = predict(model, ch, h, top_k, device) chars.append(char) # Now pass in the previous character and get a new one for ii in range(num_chars): char, h = predict(model, chars[-1], h, top_k, device) chars.append(char) return ''.join(int2char[c] for c in chars)
def train_model(model: CharRNN, criterion, optimizer, inputs: Variable, targets: Variable) -> float: batch_size = inputs.size(0) window_size = inputs.size(1) hidden = cuda(model.init_hidden(batch_size)) model.zero_grad() loss = 0 for c in range(window_size): output, hidden = model(inputs[:, c], hidden) loss += criterion(output.view(batch_size, -1), targets[:, c]) loss.backward() optimizer.step() return loss.data[0] / window_size
def main(_): # 设置模型的保存路径 model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) # 载入待训练文件 with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() # 构建文字转换的实例 converter = TextCoverter(text, FLAGS.max_vocab) # 保存已转换的文字实例的序列化数据,供后面的模型使用 converter.save_to_file(os.path.join(model_path, 'converter.pkl')) # 将词转换成对应的词典中的位置的索引, 如“寒随穷律变,春逐鸟声开。初风飘带柳,晚雪间花梅。”, 因为','和句号在词典中排在前两位, # 则它们对应的索引是'0'和'1',此处对应的arr即为[15 17 12 22 6 0 5 8 18 19 16 1 4 7 2 21 3 9 0 10 11 20 13 14 1] arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) for x, y in g: print(x) print(y) break print("This is vocabulary size length: {}".format(converter.vocab_size)) # 模型搭建 model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size)
def train(args, model: CharRNN, step, epoch, corpus, char_to_id, criterion, model_file): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) batch_chars = args.window_size * args.batch_size save = lambda ep: torch.save( { 'state': model.state_dict(), 'epoch': ep, 'step': step, }, str(model_file)) log = Path(args.root).joinpath('train.log').open('at', encoding='utf8') for epoch in range(epoch, args.n_epochs + 1): try: losses = [] n_iter = args.epoch_batches or (len(corpus) // batch_chars) report_each = min(10, n_iter - 1) tr = tqdm.tqdm(total=n_iter * batch_chars) tr.set_description('Epoch {}'.format(epoch)) model.train() for i in range(n_iter): inputs, targets = random_batch( corpus, batch_size=args.batch_size, window_size=args.window_size, char_to_id=char_to_id, ) loss = train_model(model, criterion, optimizer, inputs, targets) step += 1 losses.append(loss) tr.update(batch_chars) mean_loss = np.mean(losses[-report_each:]) tr.set_postfix(loss=mean_loss) if i and i % report_each == 0: write_event(log, step, loss=mean_loss) tr.close() save(ep=epoch + 1) except KeyboardInterrupt: print('\nGot Ctrl+C, saving checkpoint...') save(ep=epoch) print('done.') return if args.valid_corpus: valid_result = validate(args, model, criterion, char_to_id) write_event(log, step, **valid_result) print('Done training for {} epochs'.format(args.n_epochs))
def main(unused_args): with open(os.path.join(FLAGS.session_dir, 'labels.pkl')) as f: char_to_id = pickle.load(f) with open(os.path.join(FLAGS.session_dir, 'config.pkl')) as f: config = pickle.load(f) with tf.variable_scope('model'): m = CharRNN('infer', config) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.session_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print(ckpt.model_checkpoint_path, 'restored') while True: seed = raw_input('seed:') start_time = time.time() print(m.sample(sess, char_to_id, FLAGS.num_steps, seed)) print(FLAGS.num_steps / (time.time() - start_time), 'cps')
def main(_): FLAGS.start_string = FLAGS.start_string converter = TextCoverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) model = CharRNN( converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size, ) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) print(converter.arr_to_text(arr))
def count_parameters(model: CharRNN): """ counts the total number of parameters in a model """ return sum(p.numel() for p in model.parameters() if p.requires_grad)
seq_length=seq_length, device=device) validation_data = CharacterDataset(validation_text, vocabulary, batch_size=batch_size, seq_length=seq_length, device=device) # and make our data loaders # batch size is exactly 1 character by default, which is exactly what we need train_loader = DataLoader(train_data) validation_loader = DataLoader(validation_data) # Part 3: modelling # we create our model model = CharRNN(num_chars).to(device) # and the initial hidden state (a tensor of zeros) initial_state = model.init_hidden(batch_size, device) # we evaluate the capability of our model # a character to parameter ratio approaching 1 is optimal # too many parameters and the model may overfit # too few and the model may underfit char_param_ratio = len(text) / count_parameters(model) print("Character to model parameter ratio: %f\n" % char_param_ratio) # Part 4: training train(model, initial_state, train_loader=train_loader, validation_loader=validation_loader,
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="We", subject=[0, 0, 0, 0, 0, 0, 0]): """ Sampling new text checkpoint n_sample: length of the sample lstm_size: number of hidden nodes vocab_size prime: start text """ # convert input word to chars samples = [c for c in prime] # sampling=True means batch of size=1 x 1 model = CharRNN.CharRNN(len(CharRNN.vocab), lstm_size=lstm_size, sampling=True, feature_size=8) saver = tf.train.Saver() with tf.Session() as sess: # Restore session saver.restore(sess, checkpoint) new_state = sess.run(model.initial_state) for c in prime: x = np.zeros((1, 1)) # input single char x[0, 0] = CharRNN.vocab_to_int[c] subject_reshape = np.reshape(subject, (1, 1, -1)).astype(dtype=int) feed = { model.inputs: x, model.keep_prob: 1., model.subject: subject_reshape, model.initial_state: new_state } preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(CharRNN.vocab)) # add new predictions to the sampling samples.append(CharRNN.int_to_vocab[c]) # generate new chars till the limit for i in range(n_samples): x[0, 0] = c subject_reshape = np.reshape(subject, (1, 1, -1)).astype(dtype=int) feed = { model.inputs: x, model.keep_prob: 1., model.subject: subject_reshape, model.initial_state: new_state } preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(CharRNN.vocab)) samples.append(CharRNN.int_to_vocab[c]) return ''.join(samples)
batch_size = 10 num_steps = 100 lstm_size = 512 num_layers = 2 learning_rate = 0.001 keep_prob = 0.5 feature_size = 8 epochs = 100 save_every_n = 500 model = CharRNN.CharRNN(len(CharRNN.vocab), batch_size=batch_size, num_steps=num_steps, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate, feature_size=feature_size) saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) counter = 0 for e in range(epochs): new_state = sess.run(model.initial_state) loss = 0 for x, y in CharRNN.get_batches(CharRNN.merged_data_matrix, batch_size, num_steps, feature_size):
def main( representation, train=None, generate=None, temperature=DEFAULT_TEMPERATURE, max_generate_len=DEFAULT_MAX_GEN_LEN, generator_prime_str=FILE_START, window_size=DEFAULT_WINDOW_SIZE, batch_size=DEFAULT_BATCH_SIZE, disable_cuda=DEFAULT_DISABLE_CUDA, learning_rate=DEFAULT_LEARNING_RATE, num_epochs=DEFAULT_NUM_EPOCHS, patience=DEFAULT_PATIENCE, recurrent_type=DEFAULT_RECURRENT_TYPE, hidden_size=DEFAULT_RECURRENT_HIDDEN_SIZE, recurrent_layers=DEFAULT_RECURRENT_LAYERS, recurrent_dropout=DEFAULT_RECURRENT_DROPOUT, print_every_iter=DEFAULT_PRINT_EVERY_ITER, log_level=DEFAULT_LOG_LEVEL, ): # https://github.com/pytorch/pytorch/issues/13775 torch.multiprocessing.set_start_method("spawn") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(log_level) use_cuda = torch.cuda.is_available() if disable_cuda: use_cuda = False if representation == "char": # Create the neural network structure logger.info("Constructing the neural network architecture...") n_chars = len(CHARACTERS) nn = CharRNN(n_chars, n_chars, hidden_size=hidden_size, recurrent_type=recurrent_type, recurrent_layers=recurrent_layers, recurrent_dropout=recurrent_dropout, use_cuda=use_cuda) if use_cuda: nn.cuda() if train: # Warn if window_size is None, batch_size should be 1 if window_size is None and batch_size is not 1: logger.warning("~" * 40) logger.warning( "WARN: Undefined window_size with batch_size: {}".format( batch_size)) logger.warning( "\tBatches may not have equal sequence lengths!") logger.warning( "\tWindow size should be defined when batch_size > 1.") logger.warning("~" * 40) # Train our model train_full(nn, max_window_size=window_size, learning_rate=learning_rate, n_epochs=num_epochs, patience_threshold=patience, batch_size=batch_size, print_every=print_every_iter, use_cuda=use_cuda) elif generate: progress_path = nn.get_progress_path() # Load our model logger.info("Loading the model weights...") path = nn.get_state_dict_path() if not os.path.isfile(path): raise FileNotFoundError( ("Model does not exist at {}. " + "Manual model renaming required.").format(path)) nn.load_state_dict(torch.load(path)) nn = nn.eval() generate_charseq(nn, prime_str=generator_prime_str, max_window_size=window_size, max_generate_len=max_generate_len, temperature=temperature)