def run_epoch(session, m, data, eval_op, verbose=False): """Lancement du modèle""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() # Sélection d'un batch de données d'apprentissage et de validation # Les données de validation sont les mots suivant ceux d'apprentissage for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexité: %.3f vitesse de traitement: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): # run number of batches / epoch # after each run, get modified state + loss from that batch cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps # need to divide cost by nbatches and num_steps (seq_len) # already divided by batch_size in cost if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, ITERS, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) # few iters for profiling, remove if complete training is needed if step > ITERS - 1: break print("Time for %d iterations %.4f seconds" % (ITERS, time.time() - start_time)) return np.exp(costs / iters)
def run_epoch(session, model, data, train_op, output_log): # 计算perplexity的辅助变量。 total_costs = 0.0 iters = 0 state = session.run(model.initial_state) # 使用当前数据训练或者测试模型。 # [按2:这里的for循环,每一步获取到的x和y都是维度为batch_size*num_steps的二维list, # (x,y)为batch_size个输入输出对,一个输入的长度和一个输出的长度都为num_steps; # 在句子中采样一个输入输出对时,比输入序列要晚一个词的那个序列即为输出序列, # 如句子为[4,5,8,1,9,3,5,0],如果从中把[5,8,1,9]采样为一个输入序列,那么相应的输出序列为[8,1,9,3]; # step为当前步的序号 for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): # 在当前batch上运行train_op并计算损失值。交叉熵损失函数计算的就是下一个单 # 词为给定单词的概率。 cost, state, _ = session.run([model.cost, model.final_state, train_op], { model.input_data: x, model.targets: y, model.initial_state: state }) # 将不同时刻、不同batch的概率加起来就可以得到第二个perplexity公式等号右 # 边的部分,再将这个和做指数运算就可以得到perplexity值。 total_costs += cost iters += model.num_steps # 只有在训练时输出日志。 if output_log and step % 100 == 0: print("After %d steps, perplexity is %.3f" % (step, np.exp(total_costs / iters))) # 返回给定模型在给定数据上的perplexity值。 return np.exp(total_costs / iters)
def run_epoch_eval(session, m, data, eval_op, use_log_probs=False): """Runs the model on the given data.""" costs = 0.0 iters = 0 logp = 0.0 wordcn = 0 state = m.initial_state.eval() # This feeds one word at a time when batch size and num_steps are both 1 for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps), start=1): if use_log_probs: log_probs, state = session.run([m.log_probs, m.final_state], { m.input_data: x, m.initial_state: state }) logp += (log_probs[0][y[0]])[0] wordcn += 1 else: cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if use_log_probs: logging.info("Test log probability={}".format(logp)) logging.info("Test PPL: %f", np.exp(-logp / wordcn)) return logp else: logging.info("Test PPL: %f", np.exp(costs / iters)) return np.exp(costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" # epoch_size 表示批次总数。也就是说,需要向session喂这么多次数据 epoch_size = ( (len(data) // model.batch_size) - 1) // model.num_steps # // 表示整数除法 start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): fetches = [model.cost, model.final_state, eval_op] # 要进行的操作,注意训练时和其他时候eval_op的区别 feed_dict = {} # 设定input和target的值 feed_dict[model.input_data] = x feed_dict[model.targets] = y for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c # 这部分有什么用?看不懂 feed_dict[h] = state[i].h cost, state, _ = session.run(fetches, feed_dict) # 运行session,获得cost和state costs += cost # 将 cost 累积 iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: # 也就是每个epoch要输出10个perplexity值 print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): print 'this is x', len(x), x print 'this is y', len(y), y break cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): print 'this is x', len(x), x print 'this is y', len(y), y break cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, writer, verbose=False, epoch=None): """Runs the model on the given data.""" dataLength = len(data) epoch_size = ((dataLength // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() summary_op = tf.merge_all_summaries() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): if verbose: cost, state, _, summary_str= session.run([m.cost, m.final_state, eval_op, summary_op], {m.input_data: x, m.targets: y, m.initial_state: state}) writer.add_summary(summary_str, epoch_size*epoch+step) else: cost, state, _= session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) if verbose: checkpoint_path = os.path.join( "/Users/tenylong/Documents/RNN/ptb_model.ckpt") m.saver.save(session, checkpoint_path, global_step=m.global_step) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False, summary_op=None, summary_writer=None): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() merged =tf.no_op() if summary_op is not None: merged = summary_op for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): summaries, cost, state, _ = session.run([ merged, m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if step % (epoch_size // 10) == 10: perplexity = np.exp(costs / iters) if verbose: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, perplexity, iters * m.batch_size / (time.time() - start_time))) if summary_writer is not None and perplexity is not None: summary_writer.add_summary(tf.scalar_summary("perplexity",tf.constant(perplexity)).eval(), step) if summary_writer is not None and summaries is not None: summary_writer.add_summary(summaries, step) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, writer, verbose=False, epoch=None): """Runs the model on the given data.""" dataLength = len(data) epoch_size = ((dataLength // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() summary_op = tf.merge_all_summaries() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): if verbose: cost, state, _, summary_str = session.run( [m.cost, m.final_state, eval_op, summary_op], { m.input_data: x, m.targets: y, m.initial_state: state }) writer.add_summary(summary_str, epoch_size * epoch + step) else: cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate( ptb_iterator(data, model.batch_size, model.num_steps)): fetches = [model.cost, model.final_state, eval_op] feed_dict = { model.input_data: x, model.targets: y, } for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h cost, state, _ = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch_eval(session, m, data, eval_op, use_log_probs=False): """Runs the model on the given data.""" costs = 0.0 iters = 0 logp = 0.0 wordcn = 0 state = m.initial_state.eval() # This feeds one word at a time when batch size and num_steps are both 1 for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps), start=1): if use_log_probs: log_probs, state = session.run([m.log_probs, m.final_state], {m.input_data: x, m.initial_state: state}) logp += (log_probs[0][y[0]])[0] wordcn += 1 else: cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if use_log_probs: logging.info("Test log probability={}".format(logp)) logging.info("Test PPL: %f", np.exp(-logp/wordcn)) return logp else: logging.info("Test PPL: %f", np.exp(costs / iters)) return np.exp(costs / iters)
def run_epoch(session, model, data, train_op, output_log): # 计算perplexity的辅助变量。 total_costs = 0.0 iters = 0 state = session.run(model.initial_state) # 使用当前数据训练或者测试模型。 for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): # 在当前batch上运行train_op并计算损失值。交叉熵损失函数计算的就是下一个单 # 词为给定单词的概率。 cost, state, _ = session.run([model.cost, model.final_state, train_op], { model.input_data: x, model.targets: y, model.initial_state: state }) # 将不同时刻、不同batch的概率加起来就可以得到第二个perplexity公式等号右 # 边的部分,再将这个和做指数运算就可以得到perplexity值。 total_costs += cost iters += model.num_steps # 只有在训练时输出日志。 if output_log and step % 100 == 0: print("After %d steps, perplexity is %.3f" % (step, np.exp(total_costs / iters))) # 返回给定模型在给定数据上的perplexity值。 return np.exp(total_costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): fetches = [model.cost, model.final_state, eval_op] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h cost, state, _ = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def testPtbIterator(self): raw_data = [4, 3, 2, 1, 0, 5, 6, 1, 1, 1, 1, 0, 3, 4, 1] batch_size = 3 num_steps = 2 output = list(reader.ptb_iterator(raw_data, batch_size, num_steps)) self.assertEqual(len(output), 2) o1, o2 = (output[0], output[1]) self.assertEqual(o1[0].shape, (batch_size, num_steps)) self.assertEqual(o1[1].shape, (batch_size, num_steps)) self.assertEqual(o2[0].shape, (batch_size, num_steps)) self.assertEqual(o2[1].shape, (batch_size, num_steps))
def fit(self, train_data, dev_data): dev_iterator = reader.ptb_iterator(dev_data, self.batch_size, self.seq_length) x_dev_batch, y_dev_batch = next(dev_iterator) with tf.Session(graph=self.graph) as sess: sw = tf.train.SummaryWriter(self.log_dir, sess.graph) print('Initializing all variables') sess.run(tf.initialize_all_variables()) # Restoring embedding if self.restore_embedding is True: print('restoring embedding: %s' % self.embedding_chkpt.model_checkpoint_path) self.embedding_saver.restore( sess, save_path=self.embedding_chkpt.model_checkpoint_path) for i in range(self.num_epochs): train_iterator = reader.ptb_iterator(train_data, self.batch_size, self.seq_length) for x_batch, y_batch in train_iterator: _, train_summaries, total_loss, current_step = self.train_step( sess, x_batch, y_batch) sw.add_summary(train_summaries, current_step) if current_step % self.eval_freq == 0: dev_summaries = self.dev_step(sess, x_dev_batch, y_dev_batch) sw.add_summary(dev_summaries, current_step) if current_step % self.save_freq == 0: self.saver.save(sess, self.log_dir + '/rnn.chkp', global_step=current_step) epoch_acc = self.eval(sess, dev_data) print('Epoch: %d, Accuracy: %f' % (i + 1, epoch_acc)) self.save(sess)
def main(): data_directory = "data" word_to_id = reader._build_vocab(os.path.join(data_directory, "ptb.train.txt")) train, cv, test, _ = reader.ptb_raw_data(data_directory) train_batch_size = 128 train_num_steps = len(train) // train_batch_size - 1 train_num_steps = 10 ptb_iterator = reader.ptb_iterator(train, train_batch_size, train_num_steps) learner = Learner(word_to_id) learner.Train(ptb_iterator, train_batch_size, train_num_steps)
def eval(self, sess, test_data): test_iterator = reader.ptb_iterator(test_data, self.batch_size, self.seq_length) nb_step = 0 avg_acc = 0 for x_batch, y_batch in test_iterator: nb_step += 1 avg_acc += sess.run(self.accuracy, feed_dict={ self.x_plh: x_batch, self.y_plh: y_batch }) avg_acc /= nb_step return avg_acc
def run_epoch(session, m, data, eval_op, Pstate=None, verbose=False, prev_state=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 if not prev_state: state = m.initial_state.eval() else: state = Pstate #previous state for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, proba, _ = session.run( [m.cost, m.final_state, m.proba, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if prev_state: #if previous state exists only calculate for one step return (proba, state) #if verbose:# and step % (epoch_size // 10) == 10: #print("%.3f perplexity: %.3f speed: %.0f wps" % # (step * 1.0 / epoch_size, np.exp(costs / iters), # iters * m.batch_size / (time.time() - start_time))) #print("probability:%.18f " % np.amax(proba)) #index = np.argmax(proba) #print("word: ", index) #print("cost: ", cost) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (left_batch, right_batch, relatedness_scores) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.l_inputs: left_batch, m.r_inputs: right_batch, m.targets: relatedness_score, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): fetches = [model.cost, model.final_state, eval_op] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y if FLAGS.model != "memory": if FLAGS.rnn == "lstm": for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h else: for i, h in enumerate(model.initial_state): feed_dict[h] = state[i] else: for i, init_s in enumerate(model.initial_state): for s_i, s in enumerate(init_s): feed_dict[s] = state[i][s_i] cost, state, _ = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) if verbose: checkpoint_path = os.path.join("/Users/tenylong/Documents/RNN/ptb_model.ckpt") m.saver.save(session, checkpoint_path, global_step=m.global_step) return np.exp(costs / iters)
def run_epoch(session, model, data, train_op, output_log): #计算perplexity的辅助变量 total_costs = 0.0 iters = 0 state = session.run(model.initial_state) #使用当前数据训练或者测试模型 for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): #交叉熵损失函数计算的就是下一个单词给定单词的概率 cost, state, _ = session.run([model.cost, model.final_state, train_op], { model.input_data: x, model.targets: y, model.initial_state: state }) total_costs += cost iters += model.num_steps #只有在训练时输出日志 if output_log and step % 100 == 0: print("After %d steps,perplexity is %.3f" % (step, np.exp(total_costs / iters))) return np.exp(total_costs / iters)
def gen_epochs(n, num_steps, batch_size): for i in range(n): yield reader.ptb_iterator(data, batch_size, num_steps)
def _gen_epochs(self, data, num_epochs, batch_size, truncate_at): for _ in range(num_epochs): yield reader.ptb_iterator(data, batch_size, truncate_at)
def train_model(self,reload_checkpoint=False): #set up loss function and cost loss = tf.models.rnn.seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self._targets, [-1])], [tf.ones([self.batch_size * self.num_steps])], self.vocab_size) _cost = cost = tf.reduce_sum(loss) / self.batch_size # learning rate shall be variable, not fixed _lr = tf.Variable(self.learning_rate, trainable=False) # get tensorflow gradients tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), self.max_grad_norm) # istantiate optimizer and tensorflow training operation optimizer = tf.train.GradientDescentOptimizer(_lr) _train_op = optimizer.apply_gradients(zip(grads, tvars)) print "---- Training model ----" # we use the ptb reader from tensorflow.models.rnn.ptb import reader self.saver = tf.train.Saver() # open tensorflow session and start training with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.95))) as session: init=tf.initialize_all_variables() session.run(init) # enables the model to continue from a previously saved checkpoint if not reload_checkpoint: i_start=0 step_start=0 else: print "restoring model" self.saver.restore(session, self.model_path+"/lstm-model.ckpt") state = self._initial_state.eval() print "restoring training state" i_start,step_start,costs_start,iters_start,training_time=pickle.load(open(self.model_path+"/lstm-model.info.pkl")) # (don't need training time any more, leaving it in form compatibility reasons) print "restored model, will start at epoch",i_start,"at step",step_start writer = tf.train.SummaryWriter("/tmp/lstm_log", session.graph_def) stats_interval=1000 # higher means more often # start the training loop for i in range(i_start,self.max_epochs): print "epoch "+str(i) epoch_size = ((len(self.data) // self.batch_size) - 1) // self.num_steps print "Epoch size: ",epoch_size session.run(tf.assign(_lr,self.learning_rate*np.power(0.75,i))) print "Learning Rate:",self.learning_rate*np.power(0.75,i) start_time = time.time() costs = 0.0 iters = 0 state = self._initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(self.data, self.batch_size, self.num_steps)): if i==i_start and (step<step_start or (step==step_start and step>0)): if step==0: print "fast-forwarding...", costs=costs_start iters=iters_start #start_time-=training_time iters_start=0 if step%100==0: print ".", continue cost, state, _ = session.run([_cost, self._final_state, _train_op], {self._input_data: x, self._targets: y, self._initial_state: state}) costs += cost iters += self.num_steps stats_or_saved=False if step % (epoch_size // stats_interval) == 10: print ("%.3f perplexity: %.3f speed: %.0f wps"% (step * 1.0 / epoch_size, np.exp(costs / iters), (self.num_steps*(epoch_size // stats_interval) * self.batch_size) / (time.time() - start_time))), start_time = time.time() stats_or_saved=True # save the model and meta infos from time to time if step % (epoch_size // 200) == 10: tmp_start=time.time() print "(saving...", sys.stdout.flush() save_path = self.saver.save(session, self.model_path+"/lstm-model.ckpt") f=open(self.model_path+"/lstm-model.info.pkl","w") pickle.dump([i,step,costs,iters,time.time()-tmp_start],f) f.close() print "ok)", stats_or_saved=True if stats_or_saved: print ""
def train_model(self, reload_checkpoint=False): #set up loss function and cost loss = tf.models.rnn.seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self._targets, [-1])], [tf.ones([self.batch_size * self.num_steps])], self.vocab_size) _cost = cost = tf.reduce_sum(loss) / self.batch_size # learning rate shall be variable, not fixed _lr = tf.Variable(self.learning_rate, trainable=False) # get tensorflow gradients tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), self.max_grad_norm) # istantiate optimizer and tensorflow training operation optimizer = tf.train.GradientDescentOptimizer(_lr) _train_op = optimizer.apply_gradients(zip(grads, tvars)) print "---- Training model ----" # we use the ptb reader from tensorflow.models.rnn.ptb import reader self.saver = tf.train.Saver() # open tensorflow session and start training with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.95))) as session: init = tf.initialize_all_variables() session.run(init) # enables the model to continue from a previously saved checkpoint if not reload_checkpoint: i_start = 0 step_start = 0 else: print "restoring model" self.saver.restore(session, self.model_path + "/lstm-model.ckpt") state = self._initial_state.eval() print "restoring training state" i_start, step_start, costs_start, iters_start, training_time = pickle.load( open(self.model_path + "/lstm-model.info.pkl")) # (don't need training time any more, leaving it in form compatibility reasons) print "restored model, will start at epoch", i_start, "at step", step_start writer = tf.train.SummaryWriter("/tmp/lstm_log", session.graph_def) stats_interval = 1000 # higher means more often # start the training loop for i in range(i_start, self.max_epochs): print "epoch " + str(i) epoch_size = ( (len(self.data) // self.batch_size) - 1) // self.num_steps print "Epoch size: ", epoch_size session.run( tf.assign(_lr, self.learning_rate * np.power(0.75, i))) print "Learning Rate:", self.learning_rate * np.power(0.75, i) start_time = time.time() costs = 0.0 iters = 0 state = self._initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(self.data, self.batch_size, self.num_steps)): if i == i_start and (step < step_start or (step == step_start and step > 0)): if step == 0: print "fast-forwarding...", costs = costs_start iters = iters_start #start_time-=training_time iters_start = 0 if step % 100 == 0: print ".", continue cost, state, _ = session.run( [_cost, self._final_state, _train_op], { self._input_data: x, self._targets: y, self._initial_state: state }) costs += cost iters += self.num_steps stats_or_saved = False if step % (epoch_size // stats_interval) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), (self.num_steps * (epoch_size // stats_interval) * self.batch_size) / (time.time() - start_time))), start_time = time.time() stats_or_saved = True # save the model and meta infos from time to time if step % (epoch_size // 200) == 10: tmp_start = time.time() print "(saving...", sys.stdout.flush() save_path = self.saver.save( session, self.model_path + "/lstm-model.ckpt") f = open(self.model_path + "/lstm-model.info.pkl", "w") pickle.dump( [i, step, costs, iters, time.time() - tmp_start], f) f.close() print "ok)", stats_or_saved = True if stats_or_saved: print ""
def run_epoch(session, m, data, eval_op, train_dir, steps_per_ckpt, train=False, start_idx=0, start_state=None, tmpfile=None, m_valid=None, valid_data=None, epoch=None): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps if train: logging.info("Training data_size=%s batch_size=%s epoch_size=%s start_idx=%i global_step=%s" % \ (len(data), m.batch_size, epoch_size, start_idx, m.global_step.eval())) else: logging.info("Val/Test data_size=%s batch_size=%s epoch_size=%s start_idx=%i" % (len(data), m.batch_size, epoch_size, start_idx)) start_time = time.time() costs = 0.0 iters = 0 if start_idx == 0: state = m.initial_state.eval() else: state = start_state for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps, start_idx), start=1+start_idx): if train: logging.debug("Epoch=%i start_idx=%i step=%i global_step=%i " % (epoch, start_idx, step, m.global_step.eval())) cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if train and step % 100 == 0: logging.info("Global step = %i" % m.global_step.eval()) #if train and step % (epoch_size // 10) == 10: # logging.info("%.3f perplexity: %.3f speed: %.0f wps" % # (step * 1.0 / epoch_size, np.exp(costs / iters), # iters * m.batch_size / (time.time() - start_time))) if train and step % steps_per_ckpt == 0: logging.info("Time: {}".format(datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))) logging.info("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) checkpoint_path = os.path.join(train_dir, "rnn.ckpt") finished_idx = step - 1 logging.info("Save model to path=%s after training_idx=%s and global_step=%s" % (checkpoint_path, finished_idx, m.global_step.eval())) m.saver.save(session, checkpoint_path, global_step=m.global_step) # Save train variables with open(tmpfile, "wb") as f: # Training idx = step - 1, so we want to resume from idx = step # If we had already restarted from start_idx, this gives the offset training_idx = step logging.info("Save epoch=%i and training_idx=%i and state to resume from" % (epoch, training_idx)) pickle.dump((epoch, training_idx, state), f, pickle.HIGHEST_PROTOCOL) # Get a random validation batch and evaluate data_len = len(valid_data) batch_len = data_len // m_valid.batch_size epoch_size = (batch_len - 1) // m_valid.num_steps from random import randint rand_idx = randint(0,epoch_size-1) (x_valid, y_valid) = reader.ptb_iterator(valid_data, m_valid.batch_size, m_valid.num_steps, rand_idx).next() cost_valid, _, _ = session.run([m_valid.cost, m_valid.final_state, tf.no_op()], {m_valid.input_data: x_valid, m_valid.targets: y_valid, m_valid.initial_state: m_valid.initial_state.eval()}) valid_perplexity = np.exp(cost_valid / m_valid.num_steps) logging.info("Perplexity for random validation index=%i: %.3f" % (rand_idx, valid_perplexity)) return np.exp(costs / iters)
def gen_epoch_data(num_epochs, batch_size, seq_length): for i in range(num_epochs): yield reader.ptb_iterator(data, batch_size, seq_length)
def fetch_epochs(corpus, num_epochs, num_unrolls, batch_size): for t in range(num_epochs): yield reader.ptb_iterator(corpus, batch_size, num_unrolls)
# Since we did not defined a graph before defining our operations # They all went in the default graph which TF create for us before hand graph = tf.get_default_graph() # We are going to use a helper function to create our inputs and labels # The ptb_iterator is just a helper shifiting all the words by one and returning # seq_length words for you to predict the next output # very handy for RNN or our case seq_length = 1 # We decided to predict the next word thanks only to its previous word nb_epochs = 200 with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(nb_epochs): input_gen = reader.ptb_iterator(id_corpus, corpus_length // 6, seq_length) for x_batch, y_true_batch in input_gen: to_compute = [train_op, loss_op, global_step_tensor] feed_dict = {x: x_batch, y_true: y_true_batch} _, loss, global_step = sess.run(to_compute, feed_dict=feed_dict) if i % 10 == 0: print('Epoch %d/%d - loss:%f' % (i, nb_epochs, loss)) # We compute the final accuracy x_batch = [] y_batch = [] for i, word in enumerate(id_corpus): if i == len(id_corpus) - 1: continue x_batch.append([id_corpus[i]])
def run_epoch(session, m, data, eval_op, train_dir, steps_per_ckpt, train=False, start_idx=0, tmpfile=None, m_valid=None, valid_data=None): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps logging.info("Data_size=%s batch_size=%s epoch_size=%s" % (len(data), m.batch_size, epoch_size)) start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps, start_idx), start=1 + start_idx): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if train and step % 100 == 0: logging.info("Global step = %i" % m.global_step.eval()) #if train and step % (epoch_size // 10) == 10: # logging.info("%.3f perplexity: %.3f speed: %.0f wps" % # (step * 1.0 / epoch_size, np.exp(costs / iters), # iters * m.batch_size / (time.time() - start_time))) if train and step % steps_per_ckpt == 0: logging.info("Time: {}".format( datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'))) logging.info("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) # Save train variable with open(tmpfile, "wb") as f: # Training idx = step - 1, so we want to resume from idx = step # If we had already restarted from start_idx, this gives the offset resume_from = step pickle.dump(resume_from, f, pickle.HIGHEST_PROTOCOL) checkpoint_path = os.path.join(train_dir, "rnn.ckpt") finished_idx = step - 1 logging.info( "Save model to path=%s after training_idx=%s and global_step=%s" % (checkpoint_path, finished_idx, m.global_step.eval())) m.saver.save(session, checkpoint_path, global_step=m.global_step) # Get a random validation batch and evaluate data_len = len(valid_data) batch_len = data_len // m_valid.batch_size epoch_size = (batch_len - 1) // m_valid.num_steps from random import randint rand_idx = randint(0, epoch_size - 1) (x_valid, y_valid) = reader.ptb_iterator(valid_data, m_valid.batch_size, m_valid.num_steps, rand_idx).next() cost_valid, _, _ = session.run( [m_valid.cost, m_valid.final_state, tf.no_op()], { m_valid.input_data: x_valid, m_valid.targets: y_valid, m_valid.initial_state: m_valid.initial_state.eval() }) valid_perplexity = np.exp(cost_valid / m_valid.num_steps) logging.info("Perplexity for random validation index=%i: %.3f" % (rand_idx, valid_perplexity)) return np.exp(costs / iters)
#2333333333333333333333333333333333 #使用ptb_raw_data函数来读取PTb的原始数据,并将原始数据中的单词转化为单词ID #2333333333333333333333333333333333 from tensorflow.models.rnn.ptb import reader #存放原始数据的路径 DATA_PATH = "/path/to/ptb/data" train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) #读取数据原始数据 print(len(train_data)) print(train_data[:100]) ########################### #截断并将数据组织成batch,Tensorflow提供了ptb_iterator函数 ########################### from tensorflow.models.rnn.ptb import reader #类似地读取数据原始数据 DATA_PATH = "/path/to/ptb/data" train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) #将训练数据组织成batch大小为4,截断长度的5数据组 result = reader.ptb_iterator(train_data, 4, 5) #读取第一个batch中的数据,其中包括每个时刻的输入和对应的正确输出 x, y = result.next() print("X:", x) print("y:", y)
return new_char if __name__ == "__main__": file_name = 'tinyshakespeare.txt' save_path = "saves/Shakespeare_epoch10_state100.ckpt" data = adjust_data(file_name) num_classes = chars_size # generate epochs epochs = [] num_epoch = 5 for i in range(num_epoch): epochs.append(reader.ptb_iterator(data, batch_size=50, num_steps=80)) # create graphs train_graph_object = create_graph_input(num_steps=80, cell_size=256, batch_size=50, num_classes=num_classes) chargen_graph_object = create_graph_input(num_steps=1, cell_size=256, batch_size=1, num_classes=num_classes) train_graph, chargen_graph = build_multiple_graphs( [train_graph_object, chargen_graph_object]) #chargen_graph = build_multiple_graphs([chargen_graph_object])[0] train(train_graph, epochs, save_path)