def predict(name, command): command = command.lower() label_path = path.join(path.dirname(path.realpath(__file__)), "intents", "config", "labels", "%s_labels.json" % name) with open(label_path, encoding="utf8") as f: labels = json.load(f) word_vocab = Vocabulary() word_vocab.load("%s_word_vocab.json" % name) #char embedding char_vocab = Vocabulary() char_vocab.load("%s_char_vocab.json" % name) idx2label = dict((idx, label) for idx, label in enumerate(labels)) preprocessor = Preprocessor(word_vocab, None, char_vocab) model = BiLSTMCRF(labels, len(word_vocab), len(char_vocab)) model.load_weights('intents/config/weights/%s.hdf5' % name) sentence = tokenize(command) features = preprocessor.transform([sentence]) p = model.predict(features) predicted_labels = [] for pred in p: predicted_labels.append(idx2label[pred]) for word, label in zip(sentence, predicted_labels): print('%s: %s' % (word, label))
def single_predict(): vocab_size = 4688 embed_size = 128 units = 64 num_tags = 4 _, _, char_index_dict, index_char_dict = open_file("./data/data.txt") optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.1) my_model = BiLSTMCRF(vocab_size, embed_size, units, num_tags) ckpt = tf.train.Checkpoint(optimizer=optimizer, my_model=my_model) ckpt.restore(tf.train.latest_checkpoint("./save_checkpoint/")) text = input_text() char_index_list = [char_index_dict.get(char, 0) for char in text] text_list = [char for char in text] tag_list = ['b', 'm', 'e', 's'] inputs = tf.keras.preprocessing.sequence.pad_sequences([char_index_list], padding='post') #predict得到numpy矩阵 logits, inputs_length = my_model.predict(inputs) #viterbi_decode得到最优路径 path, _ = tfa_crf.viterbi_decode(logits[0], my_model.transition_params) path_list = [tag_list[index] for index in path] new_path_list = tag_finetune(path_list) #衡量标签路径更改的程度 print("标签正常率%.2f%%" % (100 * sum([i1 == i2 for i1, i2 in zip(path_list, new_path_list)]) / len(path_list))) seg_text(text, new_path_list)
def predict(text, config, params, is_export=False): """模型预测。""" # 读取词典 vocab2id, id2vocab = read_vocab(config["vocab_file"]) tag2id, id2tag = read_vocab(config["tag_file"]) # 构建模型 model = BiLSTMCRF(hidden_num=params["hidden_num"], vocab_size=len(vocab2id), label_size=len(tag2id), embedding_size=params["embedding_size"]) model.load_weights(config["ckpt_path"]) # 数据预处理 dataset = tf.keras.preprocessing.sequence.pad_sequences( [[vocab2id.get(char, 0) for char in text]], padding='post', maxlen=params["maxlen"]) # 模型预测 result = model.predict(dataset)[0] result = np.argmax(result, axis=-1) result = [id2tag[i] for i in result] print(result) # 结果处理 entities_result = format_result(list(text), result) print(json.dumps(entities_result, indent=4, ensure_ascii=False)) if is_export: # 导出模型 tf.keras.models.save_model(model, config["export_dir"], overwrite=True, include_optimizer=True, save_format=None, options=None)
def predict(text, config, params): """模型预测。""" # 读取词典 vocab2id, id2vocab = read_vocab(config["vocab_file"]) tag2id, id2tag = read_vocab(config["tag_file"]) # 构建模型 model = BiLSTMCRF( hidden_num=params["hidden_num"], vocab_size=len(vocab2id), label_size=len(tag2id), embedding_size=params["embedding_size"]) model.load_weights(config["ckpt_path"]) # 数据预处理 dataset = tf.keras.preprocessing.sequence.pad_sequences( [[vocab2id.get(char, 0) for char in text]], padding='post') # 模型预测 result = model.predict(dataset)[0] result = np.argmax(result, axis=-1) result = [id2tag[i] for i in result] print(result) # 结果处理 entities_result = format_result(list(text), result) print(json.dumps(entities_result, indent=4, ensure_ascii=False))