def test(self): predictor = Predictor() model = Linear(2, 1) data = prepare_fake_dataset() data.set_input("x") ans = predictor.predict(model, data) self.assertEqual(len(ans), 2000) self.assertTrue(isinstance(ans[0], torch.Tensor))
def test_simple(self): model = LinearModel() predictor = Predictor(model) data = prepare_fake_dataset() data.set_input("x") ans = predictor.predict(data) self.assertTrue(isinstance(ans, defaultdict)) self.assertTrue("predict" in ans) self.assertTrue(isinstance(ans["predict"], list))
def test_seq_label(self): model_args = { "vocab_size": 10, "word_emb_dim": 100, "rnn_hidden_units": 100, "num_classes": 5 } infer_data = [['a', 'b', 'c', 'd', 'e'], ['a', '@', 'c', 'd', 'e'], ['a', 'b', '#', 'd', 'e'], ['a', 'b', 'c', '?', 'e'], ['a', 'b', 'c', 'd', '$'], ['!', 'b', 'c', 'd', 'e']] vocab = Vocabulary() vocab.word2idx = { 'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, '!': 5, '@': 6, '#': 7, '$': 8, '?': 9 } class_vocab = Vocabulary() class_vocab.word2idx = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4} os.system("mkdir save") save_pickle(class_vocab, "./save/", "class2id.pkl") save_pickle(vocab, "./save/", "word2id.pkl") model = SeqLabeling(model_args) predictor = Predictor("./save/", task="seq_label") results = predictor.predict(network=model, data=infer_data) self.assertTrue(isinstance(results, list)) self.assertGreater(len(results), 0) for res in results: self.assertTrue(isinstance(res, list)) self.assertEqual(len(res), 5) self.assertTrue(isinstance(res[0], str)) os.system("rm -rf save") print("pickle path deleted")
def predict(model, subset_for_prediction, targets, filename): predictor = Predictor(model) predictions = predictor.predict(subset_for_prediction)['pred'] words = list(subset_for_prediction.get_field('raw_words')) lines = [] # print(predictions) # print(f'predicted labels for {len(predictions)}/{len(words)} items') words_sequence_index = 1 labels_sequence_index = 0 for sentence in list(zip(predictions, words)): if type(sentence[labels_sequence_index][0]) == int: continue words = sentence[words_sequence_index] print(sentence[labels_sequence_index]) labels = map(lambda label: f'{targets.to_word(label).split("-")[-1]}', sentence[labels_sequence_index][0]) for pair in zip(words, labels): lines.append('\t'.join(pair)) lines.append('') write_lines(filename, lines)
def _predict(self, subset_for_prediction, targets, filename): predictor = Predictor(self) predictions = predictor.predict(subset_for_prediction)['pred'] words = list(subset_for_prediction.get_field('raw_words')) lines = [] words_sequence_index = 1 labels_sequence_index = 0 for sentence in list(zip(predictions, words)): if type(sentence[labels_sequence_index][0]) == int: continue words = sentence[words_sequence_index] #print(sentence[labels_sequence_index]) #labels = map(lambda label: f'{targets.to_word(label).split("-")[-1]}', sentence[labels_sequence_index][0]) labels = map(lambda label: f'{targets.to_word(label)}', sentence[labels_sequence_index][0]) for pair in zip(words, labels): lines.append(' '.join(pair)) lines.append('') if filename is not None: write_lines(filename, lines) return lines
def infer(): # Load infer configuration, the same as test test_args = ConfigSection() ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") test_args["vocab_size"] = len(word2index) index2label = load_pickle(pickle_path, "id2class.pkl") test_args["num_classes"] = len(index2label) # Define the same model model = SeqLabeling(test_args) # Dump trained parameters into the model ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl") print("model loaded!") # Data Loader raw_data_loader = BaseLoader(data_infer_path) infer_data = raw_data_loader.load_lines() """ Transform strings into list of list of strings. [ [word_11, word_12, ...], [word_21, word_22, ...], ... ] In this case, each line in "people_infer.txt" is already a sentence. So load_lines() just splits them. """ # Inference interface infer = Predictor(pickle_path) results = infer.predict(model, infer_data) print(results) print("Inference finished!")
def __init__(self, modelFile, vocabFile, addTarget2Vocab=False): # CHAR_INPUT="chars", 并且会转化为word_index self._vocabFile = vocabFile self._addTarget2Vocab = addTarget2Vocab self._CONST_CHAR = Const.CHAR_INPUT self._CONST_WORDS = Const.INPUT self._CONST_TARGET = Const.TARGET self._input_fields = [self._CONST_WORDS, Const.INPUT_LEN] self._word_counter, self._word_vocab, self._target_counter, \ self._target_vocab, self._target = self._get_vocabs() self._vocab4word = Vocabulary() self._update_word() if self._addTarget2Vocab: self._vocab4target = Vocabulary(unknown=None, padding=None) self._input_fields.append(self._CONST_TARGET) self._update_target() self._model = Predictor(ModelLoader().load_pytorch_model(modelFile))
label_link_dict = dict() for row_json in json_file_iter: label_link_dict[row_json['label_desc']] = row_json['label'] logger.info(label_link_dict) logger.warn('开始加载模型') model = torch.load(model_name) model.eval() logger.info('模型加载完毕:\n{}'.format(model)) logger.warn('获取词典') char_vocab = load_serialize_obj(char_vocab_pkl_file) logger.info('char_vocab:{}'.format(char_vocab)) target_vocab = load_serialize_obj(target_vocab_pkl_file) logger.info('target_vocab:{}'.format(target_vocab)) logger.warn('加载测试数据') json_file_iter = read_json_file_iter(test_data_json_file_name) predictor = Predictor(model) with codecs.open( predict_output_json_file_name, mode='w', encoding='utf8') as fw_json, codecs.open(predict_output_file_name, mode='w', encoding='utf8') as fw: for i, row_json in enumerate(json_file_iter): if i % 100 == 0: logger.info('predict row:{}'.format(i)) sentence = row_json.get('sentence', '') keywords = row_json.get('keywords', '') text = remove_blank('{}{}'.format(sentence, keywords)) input_data = [] test_data = [list(text)] # logger.info('test_data len:{}'.format(len(test_data))) # logger.warn('输入数据预处理')
optimizer, loss, args.batch, n_epochs=args.epoch, dev_data=datasets['dev'], metrics=metrics, device=device, callbacks=create_cb(), dev_batch_size=args.test_batch, test_use_tqdm=False, check_code_level=-1, update_every=args.update_every) trainer.train() print('Evaluating...') with torch.no_grad(): model = Predictor(model) pred = model.predict( datasets['dev'], seq_len_field_name='seq_len', )['pred'] pred = [[vocabs['label'].to_word(ele) for ele in arr] for arr in pred] target = list(datasets['dev']['target']) target = [[vocabs['label'].to_word(ele) for ele in arr] for arr in target] cls_res = classification_report(target, pred) print(cls_res) print('=============================') visualize_error(datasets['dev'], target, pred) # Prediction to aicup data if args.do_pred: print('predicting...')
def test_seq_label(self): model_args = { "vocab_size": 10, "word_emb_dim": 100, "rnn_hidden_units": 100, "num_classes": 5 } infer_data = [['a', 'b', 'c', 'd', 'e'], ['a', '@', 'c', 'd', 'e'], ['a', 'b', '#', 'd', 'e'], ['a', 'b', 'c', '?', 'e'], ['a', 'b', 'c', 'd', '$'], ['!', 'b', 'c', 'd', 'e']] vocab = Vocabulary() vocab.word2idx = { 'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, '!': 5, '@': 6, '#': 7, '$': 8, '?': 9 } class_vocab = Vocabulary() class_vocab.word2idx = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4} os.system("mkdir save") save_pickle(class_vocab, "./save/", "label2id.pkl") save_pickle(vocab, "./save/", "word2id.pkl") model = CNNText(model_args) import fastNLP.core.predictor as pre predictor = Predictor("./save/", pre.text_classify_post_processor) # Load infer data infer_data_set = convert_seq_dataset(infer_data) infer_data_set.index_field("word_seq", vocab) results = predictor.predict(network=model, data=infer_data_set) self.assertTrue(isinstance(results, list)) self.assertGreater(len(results), 0) self.assertEqual(len(results), len(infer_data)) for res in results: self.assertTrue(isinstance(res, str)) self.assertTrue(res in class_vocab.word2idx) del model, predictor infer_data_set.set_origin_len("word_seq") model = SeqLabeling(model_args) predictor = Predictor("./save/", pre.seq_label_post_processor) results = predictor.predict(network=model, data=infer_data_set) self.assertTrue(isinstance(results, list)) self.assertEqual(len(results), len(infer_data)) for i in range(len(infer_data)): res = results[i] self.assertTrue(isinstance(res, list)) self.assertEqual(len(res), len(infer_data[i])) os.system("rm -rf save") print("pickle path deleted")