def sample(self, length): """ Sample from the Predictor's model for length tokens.""" result = [] if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_gpu() np.random.seed(self.settings.seed) # initialize generator state = make_initial_state(self.units, batchsize=1, train=False) if self.gpu >= 0: for _, value in state.items(): value.data = cuda.to_gpu(value.data) prev_item = np.array([0], dtype=np.int32) if self.gpu >= 0: prev_item = cuda.to_gpu(prev_item) # optimize vocab for easy by-the-number-retrieval ivocab = {} for c, i in self.vocab.items(): ivocab[i] = c # perform the actual sampling for i in xrange(length): state, prob = self.model.predict(prev_item, state) probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) result.append(ivocab[index]) prev_item = np.array([index], dtype=np.int32) if self.gpu >= 0: prev_item = cuda.to_gpu(prev_item) return result
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([ train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = np.array([ train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step( x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / sequence_length, steps, accum_loss.data / sequence_length, now - cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format( self.settings.learning_rate_decay, optimizer.lr)
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize)]) y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize)]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)