def predict(self, sess, input, length): feed_dict = self.create_feed_dict(input, None, length, is_training=False) fetch = [self.scores, self.trans_form] scores, trans_form = sess.run(fetch, feed_dict) for score_, length_ in zip(scores, length): score = score_[:length_] path, _ = crf.viterbi_decode(score, trans_form) return path
def decode(logits, trans, sequence_lengths, tag_num): viterbi_sequences = [] small = -1000.0 start = np.asarray([[small] * tag_num + [0]]) for logit, length in zip(logits, sequence_lengths): score = logit[:length] pad = small * np.ones([length, 1]) logits = np.concatenate([score, pad], axis=1) logits = np.concatenate([start, logits], axis=0) viterbi_seq, viterbi_score = viterbi_decode(logits, trans) viterbi_sequences .append(viterbi_seq[1:]) return viterbi_sequences
def decode(logits, trans, sequence_lengths, tag_num): viterbi_sequences = [] small = -1000.0 start = np.asarray([[small] * tag_num + [0]]) for logit, length in zip(logits, sequence_lengths): score = logit[:length] pad = small * np.ones([length, 1]) logits = np.concatenate([score, pad], axis=1) logits = np.concatenate([start, logits], axis=0) viterbi_seq, viterbi_score = viterbi_decode(logits, trans) viterbi_sequences += [viterbi_seq] return viterbi_sequences
def __decode(self, logits, trans, sequence_lengths, tag_num): viterbi_sequences = [] small = -1000.0 start = np.asarray([[small] * tag_num + [0]]) for logit, length in zip(logits, sequence_lengths): score = logit[:length] pad = small * np.ones([length, 1]) score = np.concatenate([score, pad], axis=1) score = np.concatenate([start, score], axis=0) viterbi_seq, viterbi_score = viterbi_decode(score, trans) viterbi_sequences.append(viterbi_seq[1:]) return viterbi_sequences
def predict_step(self,sess,x_batch): feed_dict={ self.x:x_batch, self.dropout_keep_prob:1.0 } lengths,unary_scores,transition_param = sess.run( [self.seq_length,self.output,self.transition_params], feed_dict) predict=[] for unary_score,length in zip(unary_scores,lengths): viterbi_sequence, _=crf.viterbi_decode(unary_score[:length],transition_param) predict.append(viterbi_sequence) return predict
def _predict_one_batch(self, sess, bat_sens, bat_seqs_len): feed_dict = {self.sentences: bat_sens, self.sequences_len: bat_seqs_len, self.dropout_keep_prob: 1.0} hidden_scores, transition_params = sess.run([self.hidden_scores, self.transition_params], feed_dict=feed_dict) bat_labels = [] for scocre, seq_len in zip(hidden_scores, bat_seqs_len): labs, _ = viterbi_decode(scocre[:seq_len], transition_params) bat_labels.append(list(labs)) return bat_labels
def test_accuraty(self, lengths, scores, trans_matrix, labels): total_labels = [] predict_labels = [] for score_, length_, label_ in zip(scores, lengths, labels): if length_ == 0: continue score = score_[:length_] path, _ = crf.viterbi_decode(score, trans_matrix) label_path = label_[:length_] predict_labels.extend(path) total_labels.extend(label_path) return total_labels, predict_labels
def decode(self, logits, lengths, matrix): """ :param logits: [batch_size, num_steps, num_tags]float32, logits :param lengths: [batch_size]int32, real length of each sequence 序列真实长度(不算padding) :param matrix: transaction matrix for inference :return: """ paths = [] for score, length in zip(logits, lengths): score = score[:length] path, _ = viterbi_decode(score, matrix) paths.append(path) return paths
def decode(self, logits, lengths, matrix): paths = [] small = -1000.0 start = np.asarray([[small] * self._num_targets + [small] + [0]]) end = np.asarray([[small] * self._num_targets + [0] + [small]]) for logit, length in zip(logits, lengths): logit = logit[:length] pad = small * np.ones([length, 2]) logit = np.concatenate([logit, pad], axis=1) logit = np.concatenate([start, logit, end], axis=0) path, _ = viterbi_decode(logit, matrix) paths.append(path[1:-1]) return paths
def decode(self, logits, lengths, matrix): paths = [] small = -1000.0 start = np.asarray([[small]*self.num_tags +[0]]) for score, length in zip(logits, lengths): score = score[:length] pad = small * np.ones([length, 1]) logits = np.concatenate([score, pad], axis=1) logits = np.concatenate([start, logits], axis=0) path, _ = viterbi_decode(logits, matrix) paths.append(path[1:]) return paths
def prediction_one_batch(self, sess, seqs): feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=0.1) if self.crf_: logits, transition_params = sess.run( [self.logits_, self.transition_params_], feed_dict=feed_dict) label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = crf.viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list, seq_len_list else: label_list = sess.run(self.labels_softmax_, feed_dict=feed_dict) return label_list, seq_len_list
def out(self, sentences, out_file=None): ''' :param sentences: 支持两种输入格式,1种是输入txt文件,一种是输入list :return: ''' sentences_list = [[char for char in sen] for sen in sentences] sentences_list, sequences_len = pad_sequences(sentences_list, self.maxLen) sentences_idx = sequences2idx(sentences_list, self.char2idx) sequences_len = [ seq if seq <= self.maxLen else self.maxLen for seq in sequences_len ] if type(out_file) == str: fw = open(out_file, 'wt', encoding='utf-8') with tf.Session() as sess: self.saver.restore(sess, self.model_path + self.model_name) labels = [0] * len(sentences_idx) # 这个是没用的 pred_labels = [] for (bat_sens, _, bat_seqs_len) in batch_yield(sentences_idx, labels, sequences_len, bs=500): feed_dict = { self.sentences: bat_sens, self.sequences_len: bat_seqs_len, self.dropout_keep_prob: 1.0 } hidden_scores, transition_params = sess.run( [self.hidden_scores, self.transition_params], feed_dict=feed_dict) bat_labels = [] for scocre, seq_len in zip(hidden_scores, bat_seqs_len): labs, _ = viterbi_decode(scocre[:seq_len], transition_params) bat_labels.append(list(labs)) pred_labels += [[self.idx2tag[idx] for idx in labs] for labs in bat_labels] result = [] for one_lab, one_sen_str in zip(pred_labels, sentences): result.append(self.get_prediction(one_lab, one_sen_str)) if type(out_file) == str: self._out_file(result, out_file) return result
def decode(self, logits, lengths, matrix): """ :param logits: [batch_size, num_steps, num_tags]float32, logits :param lengths: [batch_size]int32, real length of each sequence :param matrix: transaction matrix for inference :return: """ # inference final labels usa viterbi Algorithm paths = [] for score, length in zip(logits, lengths): logits = score[:length] path, _ = viterbi_decode(logits, matrix) paths.append(path) return paths
def predict_one_batch(self, sess, seqs): feed_dict, seq_len_list = self.update(seqs, dropout=1.0) if self.decode_method == 1: label_list = sess.run(self.labels_softmax_, feed_dict=feed_dict) return label_list, seq_len_list if self.decode_method == 0: logits, transition_params = sess.run( [self.logits, self.transition_params], feed_dict=feed_dict) label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list, seq_len_list
def predict_one_batch(model, sess, seqs): feed_dict, seq_len_list = get_feed_dict(model, seqs, dropout=1.0) if model.CRF: logits, transition_params = sess.run( [model.logits, model.transition_params], feed_dict=feed_dict) label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list, seq_len_list else: label_list = sess.run(model.labels_softmax_, feed_dict=feed_dict) return label_list, seq_len_list
def make_mask_test(logits_, sentence_legth, is_CRF=False, transition_params_=None): pred_list = [] # print(logits_) # print(sentence_legth) for log, seq_len in zip(logits_, sentence_legth): if is_CRF: viterbi_seq, _ = viterbi_decode(log[:seq_len], transition_params_) # print(viterbi_seq) else: viterbi_seq = log[:seq_len] pred_list.extend(viterbi_seq) return pred_list
def make_mask(logits_, labels_, sentence_legth, is_CRF=False, transition_params_=None): pred_list = [] label_list = [] for log, lab, seq_len in zip(logits_, labels_, sentence_legth): if is_CRF: viterbi_seq, _ = viterbi_decode(log[:seq_len], transition_params_) else: viterbi_seq = log[:seq_len] pred_list.extend(viterbi_seq) label_list.extend(lab[:seq_len]) return label_list, pred_list
def inference_tgt(self, scores, sequence_lengths=None): if not self.use_crf: return np.argmax(scores, 2) else: with tf.variable_scope(self.scope_tgt_crf, reuse=True): transitions = tf.get_variable('transitions').eval( session=self.sess) paths = np.zeros(scores.shape[:2], dtype=np.int32) for i in xrange(scores.shape[0]): tag_score, length = scores[i], sequence_lengths[i] if length == 0: continue path, _ = crf.viterbi_decode(tag_score[:length], transitions) paths[i, :length] = path return paths
def predict_labels(self, sess, inputs, max_sentence_num, max_sentence_length, sentslenlist, placelist, docslenlist): feed_dict = self.get_feed_dict(inputs, sentslenlist, placelist, docslenlist) feed_dict[self.doc2vecmodel.max_sentence_num] = max_sentence_num feed_dict[self.doc2vecmodel.max_sentence_length] = max_sentence_length logits = sess.run(self.classificalmodel.logits, feed_dict=feed_dict) label_list = [] # print("logits:" + str(logits)) for logit in logits: viterbi_seq, _ = viterbi_decode(logit, self.transition_params) # print("logit:" + str(logit)) label_list.append(viterbi_seq) return label_list
def decode(self,logits,lengths,matrix): # 预测解码(每一时刻隐状态的输出值,真实长度,转移矩阵) # 维特比算法解码 paths=[] small=-1000.0 start=np.asarray([[small]*self.num_tags,+[0]]) # 二维数组 for score,length in zip(logits,lengths): score=score[:length] # 只取有效字符的输出 pad=small*np.ones([length,1]) logits=np.concatenate([score,pad],axis=-1) logits=np.concatenate([start,logits],axis=0) path,_=viterbi_decode(logits,matrix)# 维特比解码 paths.append(path[1:]) return paths # 解码出的id
def predict_one_batch(self, sess, seqs): feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=1.0) if self.CRF: logits, transition_params = sess.run( [self.logits, self.transition_params], feed_dict=feed_dict) label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list, seq_len_list else: label_list = sess.run(self.label_pred, feed_dict=feed_dict) return label_list, seq_len_list
def predict(self, inputs, sequence_lengths): sess = self.session feed = { self.inputs: [inputs], self.sequence_lengths: sequence_lengths, self.keep_prob: 1 } logits, transition_params = sess.run( [self.logits, self.transition_params], feed_dict=feed) labels = [] for logit, sequence_length in zip(logits, sequence_lengths): viterbi_seq, _ = viterbi_decode( score=logit[:sequence_length], transition_params=transition_params) labels.append(viterbi_seq) return labels
def predict_one_batch(self, sess, seqs): # 使用viterbi algorithm預測標籤 """ :param sess: :param seqs: :return: label_list seq_len_list """ feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=1.0) logits, transition_params = sess.run( [self.logits, self.transition_params], feed_dict=feed_dict) label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list, seq_len_list
def predict(self, sess, seqs): seq_pad, seq_length = process_seq(seqs) logits, transition_params = sess.run( [self.logits, self.transition_params], feed_dict={ self.input_x: seq_pad, self.seq_length: seq_length, self.keep_pro: 1.0 }) label_ = [] for logit, length in zip(logits, seq_length): #logit 每个子句的输出值,length子句的真实长度,logit[:length]的真实输出值 # 调用维特比算法求最优标注序列 viterbi_seq, _ = viterbi_decode(logit[:length], transition_params) label_.append(viterbi_seq) return label_
def evaluate_step(self, sess, data): strings, chars, segs, tags = data feed_dict = { self.char_inputs: chars, self.seg_inputs: segs, self.dropout_keep: 1.0 } logits, lengths, transition_params = sess.run( [self.logits, self.lengths, self.transition_params], feed_dict) new_strings, predicts, new_tags = [], [], [] for length, logit, string, tag in zip(lengths, logits, strings, tags): predict, _ = crf.viterbi_decode(logit[:length], transition_params) predicts.append(predict) new_strings.append(string[:length]) new_tags.append(tag[:length]) return new_strings, predicts, new_tags
def dev_step(self,sess,x_batch,x_dict,y_batch): feed_dict={ self.x:x_batch, self.dict:x_dict, self.y:y_batch, self.dropout_keep_prob:1.0 } loss,lengths,unary_scores, transition_param=sess.run( [self.loss,self.seq_length,self.output, self.transition_params],feed_dict) predict=[] for unary_score,length in zip(unary_scores,lengths): if length==0: continue viterbi_sequence, _=crf.viterbi_decode(unary_score[:length],transition_param) predict.append(viterbi_sequence) return loss,predict
def decode(self, logits, lengths, matrix): """ :param logits: [batch_size, num_steps, num_tags] float32, logits :param lengths: [batch_size] int32, real length of each sequence :param matrix: transaction matric for inference :return: """ # inference final labels usa viterbi Algorithm paths = [] for score, length in zip(logits, lengths): if self.is_crf: path, _ = viterbi_decode(score, matrix) else: path = tf.argmax(score, axis=-1).eval() paths.append(path) return paths
def pred_labels(self, x, y, seqs): scores, transition_matrix = self.sess.run( [self.logit, self.transition], feed_dict={ self.bilstm.input_x: x, self.bilstm.input_y: y, self.bilstm.seq_lengths: seqs, self.bilstm.dropout: 1.0 }) labels = [] for i in range(scores.shape[0]): label, _ = crf.viterbi_decode(scores[i], transition_params=transition_matrix) labels.append(label) return labels
def test_accuraty(self, sess, inputs, labels): crf_trans_matrix = self.trans.eval() lengths, scores = self.run_step(sess, inputs, None, False) correct_num = 0 total_labels = 0 for score_, length_, label_ in zip(scores, lengths, labels): if length_ == 0: continue score = score_[:length_] path, _ = crf.viterbi_decode(score, crf_trans_matrix) label_path = label_[:length_] correct_num += np.sum(np.equal(path, label_path)) total_labels += length_ accuracy = 100.0 * correct_num / float(total_labels) return accuracy
def predict(self, sess, inputs, inputs_y=[]): crf_trans_matrix = self.trans.eval() lengths, scores = self.run_step(sess, inputs, None, False) paths = [] for score, length in zip(scores, lengths): score = score[:length] path, _ = crf.viterbi_decode(score, crf_trans_matrix) paths.append(path[:length]) if len(inputs_y) != 0: paths_y = [] for y, length in zip(inputs_y, lengths): paths_y.append(y[:length]) return paths, paths_y return paths
def test(self, test_set): real_seq = list() for seq in test_set[0]: real_seq.append(list(map(lambda x: self.idx2words[x], seq))) real_label = list() for seq in test_set[1]: real_label.append(list(map(lambda x: self.idx2labels[x], seq))) real_test_set = (real_seq, real_label) slot_predict = None intent_predict = None saver = tf.train.Saver() with tf.Session(config=self.config) as sess: saver.restore(sess, self.model_path) feed_dict, seq_len_list = self.get_feed_dict(test_set[0]) if self.slot_filling: if self.CRF: logits, transition_params = sess.run( [self.logits_slot, self.transition_params], feed_dict=feed_dict) slot_predicts = list() for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = viterbi_decode( logit[:seq_len], transition_params) slot_predicts.append(viterbi_seq) else: slot_predicts = sess.run(self.labels_softmax, feed_dict=feed_dict) slot_predict = list() for i in range(len(test_set[0])): seq_len = len(test_set[0][i]) predicted_seq = list( map(lambda x: self.idx2labels[x], slot_predicts[i][:seq_len])) slot_predict.append(predicted_seq) if self.intent_detection: intent_predicts = sess.run(self.intents_softmax, feed_dict=feed_dict) for i in range(len(test_set[0])): if test_set[2][i] == intent_predicts[i]: # TODO intent eval pass evaluate(real_test_set, slot_predict, intent_predict, self.error_example_output, self.true_example_output, self.slot_distinct)
def decode(self, logits, lengths, matrix): """ :param logits: [batch_size, num_steps, num_tags]float32, logits :param lengths: [batch_size]int32, real length of each sequence :param matrix: transaction matrix for inference :return: """ # inference final labels usa viterbi Algorithm paths = [] small = -1000.0 start = np.asarray([[small]*self.num_tags +[0]]) for score, length in zip(logits, lengths): score = score[:length] pad = small * np.ones([length, 1]) logits = np.concatenate([score, pad], axis=1) logits = np.concatenate([start, logits], axis=0) path, _ = viterbi_decode(logits, matrix) paths.append(path[1:]) return paths