def prediction(path, mode="bert_bilstm", is_eval=False): labels_to_ix, ix_to_label = NER_pre_data.build_label(normal_param.labels) vocab = process_data_for_keras.read_vocab(normal_param.lstm_vocab) if mode == "lstm": save_path = normal_param.save_path_lstm model = keras_LSTM_CRF.load_embedding_bilstm2_crf_model( save_path, len(vocab), len(labels_to_ix), normal_param.max_length) elif mode == "bilstm": save_path = normal_param.save_path_bilstm model = keras_BILSTM_CEF.load_embedding_bilstm2_crf_model( save_path, len(vocab), len(labels_to_ix), normal_param.max_length) elif mode == "bert_bilstm": save_path = normal_param.save_path_bert_bilstm model = keras_Bert_bilstm_crf.load_embedding_bilstm2_crf_model( save_path, len(labels_to_ix)) elif mode == "rnn": save_path = normal_param.save_path_gru model = keras_RNN_CRF.load_embedding_bilstm2_crf_model( save_path, len(vocab), len(labels_to_ix), 0) else: save_path = normal_param.save_path_wordVEC_bilstm embeddings_matrix, vocab = process_data_for_keras.txtpad_use_word2vec() # NUM_CLASS, embeddings_matrix, input_length model = keras_word2vec_bilstm_crf.load_embedding_bilstm2_crf_model( save_path, len(labels_to_ix), embeddings_matrix, normal_param.max_length) myNerInfer = NERInference.NERInference(model, vocab, ix_to_label, len(vocab), path, mode=mode) new_string4_pred, ix = myNerInfer.predict_all(is_eval) return new_string4_pred
def process_data(embeding = None, is_train = True, vocab2 = None): ''' 根据不同的embeding方法处理数据。 :param embeding: embeding方法:bert、wordvec、不用embeding方法 :return: ''' labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels) vocab = read_vocab(normal_param.lstm_vocab) # x_test, y_test = read_data(normal_param.head_test_path, vocab, labels_to_ix) if is_train: x, y = read_data(normal_param.head_path, vocab, labels_to_ix) x_train, y_train, x_test, y_test = split_tst_trn(x, y, 50) length = gain_max_length(x_train, x_test) if embeding == "wordvec": x_train, y_train, x_test, y_test = list_to_array(x_train, y_train, x_test, y_test, vocab2, labels_to_ix, length, wordembeding=embeding) else: x_train, y_train, x_test, y_test = list_to_array(x_train, y_train, x_test, y_test, vocab, labels_to_ix, length, wordembeding = embeding) y_test = np.expand_dims(y_test, 2) y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1)) return x_train, y_train, x_test, y_test, len(vocab), len(labels_to_ix) else: x, y = read_data(normal_param.head_test_path, vocab, labels_to_ix) length = gain_max_length(x, []) y_test, x_test = deal_txt_label_to_array(x, y, vocab, labels_to_ix, length, mode = embeding) return x_test, y_test
def process_test_data(): ''' 对测试集数据进行 :return: ''' labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels) vocab = read_vocab(normal_param.lstm_vocab) x, y = read_data(normal_param.head_test_path, vocab, labels_to_ix) y_test, x_test = deal_txt_label_to_array(x, y, vocab, labels_to_ix, normal_param.max_length, mode = "bert") return x_test, y_test
def process_data_gen(data, label, embeding = None): ''' 根据不同的embeding方法处理数据。 :param embeding: embeding方法:bert、wordvec、不用embeding方法 :return: ''' labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels) vocab = read_vocab(normal_param.lstm_vocab) # x, y = read_data_part(start_path, end_path) # x_test, y_test = read_data(normal_param.head_test_path, vocab, labels_to_ix) # x_train, y_train, x_test, y_test = split_tst_trn(x, y, 50) data, label = normal_util.shuffle(data, label) length = normal_param.max_length x_train, y_train = deal_txt_label_to_array(data, label, vocab, labels_to_ix, length, mode = None) # y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1)) # # y_train = np.expand_dims(y_train, 2) y_train = np.expand_dims(y_train, 2) return x_train, y_train, len(vocab), len(labels_to_ix)