def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', '-m', type=str, required=True, help='model data, saved by train_ptb.py') parser.add_argument('--primetext', '-p', type=str, required=True, default='', help='base text data, used for text generation') parser.add_argument('--seed', '-s', type=int, default=123, help='random seeds for text generation') parser.add_argument('--unit', '-u', type=int, default=650, help='number of units') parser.add_argument('--sample', type=int, default=1, help='negative value indicates NOT use random choice') parser.add_argument('--length', type=int, default=20, help='length of the generated text') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() np.random.seed(args.seed) xp = cuda.cupy if args.gpu >= 0 else np # load vocabulary vocab = chainer.datasets.get_ptb_words_vocabulary() ivocab = {} for c, i in vocab.items(): ivocab[i] = c # should be same as n_units , described in train_ptb.py n_units = args.unit lm = train_ptb.RNNForLM(len(vocab), n_units, train=False) model = L.Classifier(lm) serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() model.predictor.reset_state() primetext = args.primetext if isinstance(primetext, six.binary_type): primetext = primetext.decode('utf-8') if primetext in vocab: prev_word = chainer.Variable(xp.array([vocab[primetext]], xp.int32)) else: print('ERROR: Unfortunately ' + primetext + ' is unknown.') exit() prob = F.softmax(model.predictor(prev_word)) sys.stdout.write(primetext + ' ') for i in six.moves.range(args.length): prob = F.softmax(model.predictor(prev_word)) if args.sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) if ivocab[index] == '<eos>': sys.stdout.write('.') else: sys.stdout.write(ivocab[index] + ' ') prev_word = chainer.Variable(xp.array([index], dtype=xp.int32)) sys.stdout.write('\n')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state sum_perp = 0 data_count = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): iter.reset() for batch in iter: x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.array data_count += 1 return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: # Make the specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, args.gpu) # Compute the loss at this time step and accumulate it loss += optimizer.target(chainer.Variable(x), chainer.Variable(t)) count += 1 sum_perp += loss.array optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: {}'.format(iteration)) print('training perplexity: {}'.format( np.exp(float(sum_perp) / count))) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('validation perplexity: {}'.format(evaluate(model, val_iter))) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity: {}'.format(test_perp)) # Save the model and the optimizer print('save the model') serializers.save_npz('rnnlm.model', model) print('save the optimizer') serializers.save_npz('rnnlm.state', optimizer)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', type=str, help='Directory that has `rnnln.model`' ' and `rnnlm.state`') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) if device.xp is chainerx: sys.stderr.write('This example does not support ChainerX devices.\n') sys.exit(1) device.use() def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state sum_perp = 0 data_count = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): iter.reset() for batch in iter: x, t = convert.concat_examples(batch, device) loss = evaluator(x, t) sum_perp += loss.array data_count += 1 return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity model.to_device(device) # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Load model and optimizer if args.resume is not None: resume = args.resume if os.path.exists(resume): serializers.load_npz(os.path.join(resume, 'rnnlm.model'), model) serializers.load_npz( os.path.join(resume, 'rnnlm.state'), optimizer) else: raise ValueError( '`args.resume` ("{}") is specified,' ' but it does not exist'.format(resume) ) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, device) # Compute the loss at this time step and accumulate it loss += optimizer.target(x, t) count += 1 sum_perp += loss.array optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: {}'.format(iteration)) print('training perplexity: {}'.format( np.exp(float(sum_perp) / count))) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('validation perplexity: {}'.format( evaluate(model, val_iter))) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity: {}'.format(test_perp)) # Save the model and the optimizer out = args.out if not os.path.exists(out): os.makedirs(out) print('save the model') serializers.save_npz(os.path.join(out, 'rnnlm.model'), model) print('save the optimizer') serializers.save_npz(os.path.join(out, 'rnnlm.state'), optimizer)