def run_one_epoch(self, sess, train_data, test_data, tgt_vocab, epoch, saver): num_batches = (len(train_data) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield(train_data, self.batch_size) for step, (seqs, labels) in enumerate(batches): sys.stdout.write('processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) '''self.logger.info('===========validation / test===========')
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # Calling function batch_yield batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) # Creating a index list, step is index and (seqs, labels) is data for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' processing: {} batch / {} batches.'.format(step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: # Writing the log information self.logger.info('{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time, epoch + 1, step + 1, loss_train, step_num)) # Writing the information(loss) to summary self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def test_one_epoch(self, sess, test_data): label_list, seq_len_list = [], [] for seqs, labels in batch_yield(test_data, self.batch_size): label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train: :param dev: :param tag2label: :param epoch: :param saver: :return: """ num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): # 相当print, input相当于sys.stdin.realine() sys.stdout.write(' processing: {} batch / {} batches. \n'.format(step + 1, num_batches) + '\r') self.step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {} '.format(start_time, epoch + 1, step + 1, loss_train, self.step_num)) self.file_writer.add_summary(summary, self.step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=self.step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def run_one_epoch(self, sess, train_data_source, train_data_len, tag2label, epoch): num_batches = (train_data_len + self.batch_size - 1) // self.batch_size starttime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # batches = batch_yield(train, self.batch_size, self.word2id, self.tag2label, shuffle=self.shuffle) batches = batch_yield(train_data_source, self.batch_size, self.word2id, self.tag2label) for step, (seqs, labels) in enumerate(batches): sys.stdout.write('processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.dropout_keep_prob) _, loss_train, _ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(starttime, epoch + 1, step + 1, loss_train, step_num)) if step + 1 == num_batches: self.saver.save(sess, self.model_file_prefix, global_step=step_num)
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size # 目的是为了上取整 start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): sys.stdout.write( f'precessing {step + 1} batch / {num_batches} batches\r') # 当前总共走了多少步 step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summery, step_nu = sess.run( [self.train_op, self.loss, self.merged, self.glob_step], feed_dict=feed_dict) if (step + 1) == 1 or (step + 1) % 300 == 0 or (step + 1) == num_batches: self.logger.info( f'{start_time}: epoch {epoch+1}, ' f'step {step+1}, loss:{loss_train:.4}, global step:{step_num}' ) self.file_writer.add_summary(summary=summery, global_step=step_num) # 总步 if step + 1 == num_batches: # 最后一个batch的时候 saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
def run_one_epoch(self, sess, train, dev, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield(train, self.batch_size, shuffle=True) for step, (seqs, labels, lengths) in enumerate(batches): if (step + 1) % 10 == 0: print(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict = self.get_feed_dict(lengths, seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, step_num_ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 10 == 0 or step + 1 == num_batches: print('{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) if step + 1 == num_batches and (epoch + 1) % 5 == 0: saver.save(sess, self.model_path, global_step=step_num) print('===========validation epoch{}==========='.format(epoch + 1)) label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, file=self.valid_result)
def demo_one(self, sess, sent): """ :param sess: :param sent: :return: """ label_list = [] #随机将句子分批次,并遍历这些批次,对每一批数据进行预测 for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): #预测该批样本,并返回相应的标签数字序列 label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label #根据标签对照表将数字序列转换为文字标签序列 tag = [label2tag[label] for label in label_list[0]] print('===mode.demo_one:', 'label_list=', label_list, ',label2tag=', label2tag, ',tag=', tag) return tag
def demo_one(self, sess, sent): label_list = [] for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label tag = [label2tag[label] for label in label_list[0]] return tag
def predict_sentence(self, sess, demo_data): label_list = [] for seqs, labels in batch_yield(demo_data, self.batch_size, is_train=False): label_list_, _ = self.predict_one_batch(sess, seqs, labels) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag tags = [label2tag[label] for label in label_list[0]] return tags
def dev_one_epoch(self, sess, dev): label_list, seq_len_list = [], [] for seqs, labels in batch_yield(dev, self.config.batch_size, self.vocab, shuffle=False): label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list
def run_epoches(self, sess, train, dev, tag2label, saver, args): """ :param sess: :param train: :param dev: :param tag2label: :param epoch: :param saver: :return: """ best_f1 = 0 # 用于记录训练过程中最好的f1值 for epoch in range(self.epoch_num): num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) # if step + 1 == num_batches: # saver.save(sess, self.model_path, global_step=step_num) self.logger.info('-----------验证集测试结果------------') label_list_dev, seq_len_list_dev = self.dev_one_epoch( sess, dev, args) _, _, f1 = self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch) if f1 > best_f1: best_f1 = f1 saver.save(sess, self.model_path, global_step=step_num) print("BET_F1: {}".format(best_f1))
def dev_one_epoch(self, sess, dev): """ :param sess: Session :param dev: testing data :return: """ label_list, seq_len_list = [], [] for seqs, labels in batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list
def demo_one(self, sess, sent): """ :param sess: :param sent: :return: """ label_list = [] for seqs, labels in batch_yield(sent, self.batch_size, shuffle=False): label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) return label_list
def dev_one_epoch(self, sess, dev): """ :param sess: :param dev: :return: """ label_list, seq_len_list = [], [] for seqs, labels, lengths in batch_yield(dev, self.batch_size): label_list_ = self.predict_one_batch(sess, seqs, lengths) label_list.extend(label_list_) seq_len_list.extend(lengths) return label_list, seq_len_list
def dev_one_epoch(self, sess, dev): """ :param sess: :param dev: :return: """ label_list, seq_len_list = [], [] # 获取一个批次的句子中词的id以及标签 for seqs, labels in data.batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train:训练集 :param dev:验证集 :param tag2label:标签转换字典 :param epoch:当前训练的轮数 :param saver:保存的模型 :return: """ #训练批次数 num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) #随机为每一批分配数据 batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) #训练每一批训练集 for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) #保存模型 if step + 1 == num_batches: #保存模型数据 #第一个参数sess,这个就不用说了。第二个参数设定保存的路径和名字,第三个参数将训练的次数作为后缀加入到模型名字中。 saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) #模型评估 self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train: :param dev: :param tag2label: :param epoch: :param saver: :return: """ # 计算batch的大小 num_batches = (len(train) + self.batch_size - 1) // self.batch_size # start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 数据进行batch处理 batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) # 遍历每一个batch数据 for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 # feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) # _, loss_train, step_num_ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 100 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) # 保存模型 if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch) print("\n")
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train: :param dev: :param tag2label: :param epoch: :param saver: :return: """ num_batches = ( len(train) + self.batch_size - 1 ) // self.batch_size #一次向神经网络喂入self.batch_size个数据,求出需要喂入数据的轮数,//表示整除 ##batch_size为64 start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) #时间戳 #batches的类型是一个generator batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): #default:start=0,则step从0开始编号 sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\n') #输出信息:正在处理第1轮数据,共多少轮数据 step_num = epoch * num_batches + step + 1 #计步器 记录共运行了多少条数据 feed_dict, seq_len_list = self.get_feed_dict( seqs, labels, self.lr, self.dropout_keep_prob) op, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or ( step + 1 ) % 300 == 0 or step + 1 == num_batches: #第1轮、每300轮、最后一轮 输出信息 self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train:--train_data :param dev:--dev_data :param tag2label:--标签对比表 :param epoch:--default40轮 :param saver:--保存一些基本参数 :return: """ num_batches = (len(train) + self.batch_size - 1) // self.batch_size #batch_size 默认64 start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield( train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle ) #vocab:word2id#返回句子字典序列和标签序列#return batch_size的seqs and labels #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。 for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict( seqs, labels, self.lr, self.dropout_keep_prob) #learning rate 0.001,dropout0.5 _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): sys.stdout.write(' preprocessing: {} batch / {} batches.'.format(step+1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels) _ , loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time, epoch + 1, step + 1, loss_train, step_num ) ) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) # # 将模型存成一个 pb 文件 # graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['proj/logits']) # tf.train.write_graph(graph, '.', 'graph.pb', as_text=False) # 使用 saved_model 来保存模型 builder = tf.saved_model.builder.SavedModelBuilder('E:\\NER_LSTM\\model\\%s' % str(int(time.time()))) inputs = {'input_x': tf.saved_model.utils.build_tensor_info(self.word_ids), 'sequence_length': tf.saved_model.utils.build_tensor_info(self.sequence_lengths)} outputs = {'output': tf.saved_model.utils.build_tensor_info(self.logits), 'transition_param': tf.saved_model.utils.build_tensor_info(self.transition_params)} signature = tf.saved_model.signature_def_utils.build_signature_def(inputs, outputs, 'test_sig_name') builder.add_meta_graph_and_variables(sess,['test_saved_model'], {'test_signature': signature}) builder.save() self.logger.info('======== validation ========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def dev_one_epoch(self, sess, dev): """ :param sess: :param dev: :return: """ label_list, seq_len_list = [], [] for seqs, labels in batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, seq_len_list_ = self.predict_one_batch( sess, seqs) # 对一个batch进行预测 label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list # 所有的预测结果,用于后续的评价结果
def demo_one(self, sess, sent): """ :param sess: :param sent: :return: """ #batch_yield就是把输入的句子每个字的id返回,以及每个标签转化为对应的tag2label的值 label_list = [] for seqs, labels in data.batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label tag = [label2tag[label] for label in label_list[0]] return tag
def run_one_epoch(self, sess, train, epoch, saver, writer): num_batches = (len(train) + self.batch_size - 1) // self.batch_size batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels) _, loss_train, step_num_ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) # writer.add_summary(summary, global_step=step_num) if step + 1 == 1 or (step + 1) % 10 == 0 or step + 1 == num_batches: print('epoch {}, step {}, loss: {:.4}, global_step: {}'.format( epoch + 1, step + 1, loss_train, step_num))
def dev_one_epoch(self, sess, dev, args): """ :param sess: :param dev: :return: """ label_list, seq_len_list = [], [] count_batch = 0 for seqs, labels in batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) count_batch += 1 return label_list, seq_len_list
def demo_one(self, server, sent, verbose=None): label_list = [] for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): if verbose: print('seqs', type(seqs), len(seqs), len(seqs[0]), seqs) print('batch_size', self.batch_size) label_list_, _ = self.predict_one_batch(server, seqs, verbose=verbose) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label tag = [label2tag[label] for label in label_list[0]] return tag
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size self.logger.info("train lenght={} number_batches={}".format( len(train), num_batches)) #start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) start_time0 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) self.logger.info("=========={} epoch begin train, time is {}".format( epoch + 1, start_time0)) for step, (seq, labels) in enumerate(batches): self.logger.info("======seq length======{}".format(len(seq))) sys.stdout.write(' processing: {} batch / {} batches.'.format( step + 1, num_batches) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seq, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if step + 1 == 1 or (step + 1) % 2 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'. format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: self.logger.info("========save session========{}".format( self.model_path)) saver.save(sess, self.model_path, global_step=step_num) self.logger.info("=============validation==========") label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def demo_one(self, sess, sent): """ :param sess: :param sent: :return: """ label_list = [] for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): # label2tag[label] = tag if label != 0 else label # 0 label not converted to None label2tag[label] = tag tag = [label2tag[label] for label in label_list[0]] return tag
def demo_many(self, sess, sent): """ :param sess: :param sent: :return: """ label_list = [] count = 0 for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): count += self.batch_size label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) print(count/len(sent)) # label2tag = {} tags = [] # for tag, label in self.tag2label.items(): # label2tag[label] = tag if label != 0 else label for labels in label_list: tag = [label for label in labels] tags.append(tag) return tags
def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver): """ :param sess: :param train: :param dev: :param tag2label: :param epoch: :param saver: :return: """ # 计算出多少个batch,计算过程:(50658+64-1)//64=792 num_batches = (len(train) + self.batch_size - 1) // self.batch_size #num_batches=2000 # 记录开始训练的时间 start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 产生每一个batch batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): # sys.stdout 是标准输出文件,write就是往这个文件写数据 sys.stdout.write(' processing: {} batch / {} batches.'.format(step + 1, num_batches) + '\r') # step_num=epoch*792+step+1 step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches: self.logger.info( '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time, epoch + 1, step + 1, loss_train, step_num)) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: # 训练的最后一个batch保存模型 saver.save(sess, self.model_path, global_step=step_num) self.logger.info('===========validation / test===========') label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev) self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)