Пример #1
0
    def predict(self):
        print('Predicting ...')
        result_folder = '{}/test_result'.format(self.model_folder)
        if not os.path.exists(result_folder):
            os.makedirs(result_folder)
        # write user utters
        utter_fname = '{}/utter.txt'.format(result_folder)
        if not os.path.exists(utter_fname):
            utter_txt = self.test_data.userUtter_txt
            writeTxt(utter_txt, utter_fname, prefix='', delimiter=None)
        print('\ttest_utter={}'.format(utter_fname))
        # load test data and calculate posterior probs.
        X_test = self.test_data.userUtter_encodePad
        tag_probs, intent_probs = self.model.predict(X_test)  # a tuple, slot_tags and intents
        # make prediction
        if self.test_intent_flag:
            assert self.threshold is not None, 'Argument required: --threshold'
            intent_probs_fname = '{}/intentProb_{}.npz'.format(result_folder, os.path.basename(self.weights_fname).split('_')[0])
            np.savez_compressed(intent_probs_fname, probs=intent_probs)
            print('\tintent_probs={}'.format(intent_probs_fname))
            # write prediction test results
            pred_intent_fname = '{}/intent_{}.pred'.format(result_folder, os.path.basename(self.weights_fname).split('_')[0])
            pred_intent_txt = getActPred(intent_probs, self.threshold, self.id2userIntent)
            writeTxt(pred_intent_txt, pred_intent_fname, prefix='intent-', delimiter=';')
            print('\tintent_pred={}'.format(pred_intent_fname))
            # write target test
            target_intent_fname = '{}/intent_test.target'.format(result_folder)
            target_intent = self.test_data.userIntent_txt
            writeTxt(target_intent, target_intent_fname, prefix='intent-', delimiter=';')
            print('\tintent_target={}'.format(target_intent_fname))
            # calculate performance scores
            preds_indicator, precision, recall, fscore, accuracy_frame = eval_actPred(intent_probs,
                                                                                      self.test_data.userIntent_vecBin, 
                                                                                      self.threshold)
            print('IntentPred: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'.format(precision, recall, fscore, accuracy_frame))

        if self.test_tag_flag:
            tag_probs_fname = '{}/tagProb_{}.npz'.format(result_folder, os.path.basename(self.weights_fname).split('_')[0])
            np.savez_compressed(tag_probs_fname, probs=tag_probs)
            print('\ttag_probs={}'.format(tag_probs_fname))
            # write prediction results
            pred_tag_fname = '{}/tag_{}.pred'.format(result_folder, os.path.basename(self.weights_fname).split('_')[0])
            mask_test = np.zeros_like(X_test)
            mask_test[X_test != 0] = 1
            pred_tag_txt = getTagPred(tag_probs, mask_test, self.id2userTag)
            writeTxt(pred_tag_txt, pred_tag_fname, prefix='tag-', delimiter=None)
            print('\ttag_pred={}'.format(pred_tag_fname))
            # write target
            target_tag_fname = '{}/tag_test.target'.format(result_folder)
            target_tag = self.test_data.userTag_txt
            writeTxt(target_tag, target_tag_fname, prefix='tag-', delimiter=None)
            print('\ttag_target={}'.format(target_tag_fname))
            # calculate performance scores
            precision, recall, fscore, accuracy_frame = eval_slotTagging(tag_probs, mask_test,
                                                                         self.test_data.userTag_1hotPad, self.userTag2id['tag-O'])
            print('SlotTagging: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'.format(precision, recall, fscore, accuracy_frame))
Пример #2
0
 def predict(self, X, y_vecBin, X_utter_txt, y_txt):
     print('\tpredicting ...')
     probs = self.model.predict_proba(X)
     preds_indicator, precision, recall, fscore, accuracy_frame = eval_actPred(
         probs, y_vecBin, self.threshold)
     print(
         '\tprecision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
         .format(precision, recall, fscore, accuracy_frame))
     # write prediction test results
     pred_txt = getActPred(probs, self.threshold, self.id2token)
     pred_fname = '{}/{}_{}test.pred'.format(self.model_folder,
                                             self.task_name, self.prefix)
     writeTxt(pred_txt, pred_fname, prefix=self.prefix, delimiter=';')
     print('\ttest_pred={}'.format(pred_fname))
     # write target test
     target_fname = '{}/{}_{}test.target'.format(self.model_folder,
                                                 self.task_name,
                                                 self.prefix)
     writeTxt(y_txt, target_fname, prefix=self.prefix, delimiter=';')
     print('\ttest_target={}'.format(target_fname))
     # write utter test
     utter_fname = '{}/utter_test.txt'.format(self.model_folder)
     writeTxt(X_utter_txt, utter_fname, prefix='', delimiter=None)
     print('\ttest_utter={}'.format(utter_fname))
     return preds_indicator
Пример #3
0
 def train(self, verbose=True):
     assert self.train_X is not None and self.train_y_vecBin is not None, 'train_X and train_y_vecBin are required.'
     assert self.dev_X is not None and self.dev_y_vecBin is not None, 'dev_X and dev_y_vecBin are required.'
     print('\ttraining ...')
     self.model = OneVsRestClassifier(SVC(kernel='linear', probability=True, verbose=verbose))
     self.model.fit(self.train_X, self.train_y_vecBin)
     probs = self.model.predict_proba(self.dev_X)
     # evaluation for user intent
     precision, recall, fscore, accuracy_frame, self.threshold = eval_intentPredict(probs, self.dev_y_vecBin)
     print('\teval_dev: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}, threshold={:.4f}'.format(precision, recall, fscore, accuracy_frame, self.threshold))
     # write prediction results
     dev_txt = getActPred(probs, self.threshold, self.id2token)
     dev_pred_fname = '{}/{}_{}dev.pred'.format(self.model_folder, self.task_name, self.prefix)
     writeTxt(dev_txt, dev_pred_fname, prefix=self.prefix, delimiter=';')
     print('\tdev_pred={}'.format(dev_pred_fname))
     # write target dev
     dev_target_fname = '{}/{}_{}dev.target'.format(self.model_folder, self.task_name, self.prefix)
     writeTxt(self.dev_y_txt, dev_target_fname, prefix=self.prefix, delimiter=';')
     print('\tdev_target={}'.format(dev_target_fname))
     # write utter dev
     dev_utter_fname = '{}/utter_dev.txt'.format(self.model_folder) 
     writeTxt(self.dev_utter_txt, dev_utter_fname, prefix='', delimiter=None)
     print('\tdev_utter={}'.format(dev_utter_fname))
     # save model
     self.model_fname = '{}/{}_{}model_F1={:.4f}_FrameAcc={:.4f}_th={:.4f}.npz'.format(
         self.model_folder, self.task_name, self.prefix, fscore, accuracy_frame, self.threshold)
     np.savez_compressed(self.model_fname, model=self.model, threshold=self.threshold)
     print('\tsaving model: {}'.format(self.model_fname))
 def predict(self):
     # only write the last userIntent and userTag for each windowed sample
     print('Predicting ...')
     result_folder = '{}/test_result'.format(self.model_folder)
     if not os.path.exists(result_folder):
         os.makedirs(result_folder)
     # write user utters
     utter_fname = '{}/utter.txt'.format(result_folder)
     if not os.path.exists(utter_fname):
         test_utter_txt = self.test_data.userUtter_txt
         writeTxt(test_utter_txt, utter_fname, prefix='', delimiter=None)
     print('\ttest_utter={}'.format(utter_fname))
     # load test data and calculate posterior probs.
     X_test = self.test_data.userUtter_encodePad_window
     tag_probs, intent_probs, act_probs = self.model.predict(X_test)
     # make prediction
     if self.test_act_flag:
         assert self.threshold is not None, 'Threshold for agentAct is required.'
         act_probs_fname = '{}/actProb_{}.npz'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         np.savez_compressed(act_probs_fname, probs=act_probs)
         print('\tact_probs={}'.format(act_probs_fname))
         pred_act_fname = '{}/act_{}.pred'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         pred_act_txt = getActPred(act_probs, self.threshold,
                                   self.id2agentAct)
         writeTxt(pred_act_txt,
                  pred_act_fname,
                  prefix='act-',
                  delimiter=';')
         print('\tact_pred={}'.format(pred_act_fname))
         target_act_fname = '{}/act_test.target'.format(result_folder)
         target_act = self.test_data.agentAct_txt
         writeTxt(target_act,
                  target_act_fname,
                  prefix='act-',
                  delimiter=';')
         print('\tact_target={}'.format(target_act_fname))
         # calculate performance scores
         _, precision, recall, fscore, accuracy_frame = eval_actPred(
             act_probs, self.test_data.agentAct_vecBin, self.threshold)
         print(
             'AgentActPred: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
             .format(precision, recall, fscore, accuracy_frame))
     if self.test_intent_flag:
         assert self.threshold is not None, 'Threshold for userIntent is required.'
         intent_probs_fname = '{}/intentProb_{}.npz'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         np.savez_compressed(intent_probs_fname, probs=intent_probs)
         print('\tintent_probs={}'.format(intent_probs_fname))
         pred_intent_fname = '{}/intent_{}.pred'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         pred_intent_txt = getActPred(intent_probs[:, -1], self.threshold,
                                      self.id2userIntent)
         writeTxt(pred_intent_txt,
                  pred_intent_fname,
                  prefix='intent-',
                  delimiter=';')
         print('\tintent_pred={}'.format(pred_intent_fname))
         target_intent_fname = '{}/intent_test.target'.format(result_folder)
         target_intent = self.test_data.userIntent_txt
         writeTxt(target_intent,
                  target_intent_fname,
                  prefix='intent-',
                  delimiter=';')
         print('\tintent_target={}'.format(target_intent_fname))
         # calculate performance scores
         _, precision, recall, fscore, accuracy_frame = eval_actPred(
             intent_probs[:, -1],
             self.test_data.userIntent_vecBin_window[:, -1], self.threshold)
         print(
             'IntentPred: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
             .format(precision, recall, fscore, accuracy_frame))
     if self.test_tag_flag:
         tag_probs_fname = '{}/tagProb_{}.npz'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         np.savez_compressed(tag_probs_fname, probs=tag_probs)
         print('\ttag_probs={}'.format(tag_probs_fname))
         pred_tag_fname = '{}/tag_{}.pred'.format(
             result_folder,
             os.path.basename(self.weights_fname).split('_')[0])
         mask_test = np.zeros_like(X_test[:, -1])
         mask_test[X_test[:, -1] != 0] = 1
         pred_tag_txt = getTagPred(tag_probs[:, -1], mask_test,
                                   self.id2userTag)
         writeTxt(pred_tag_txt,
                  pred_tag_fname,
                  prefix='tag-',
                  delimiter=None)
         print('\ttag_pred={}'.format(pred_tag_fname))
         target_tag_fname = '{}/tag_test.target'.format(result_folder)
         target_tag = self.test_data.userTag_txt
         writeTxt(target_tag,
                  target_tag_fname,
                  prefix='tag-',
                  delimiter=None)
         print('\ttag_target={}'.format(target_tag_fname))
         # calculate performance scores
         precision, recall, fscore, accuracy_frame = eval_slotTagging(
             tag_probs[:, -1], mask_test,
             self.test_data.userTag_1hotPad_window[:, -1],
             self.userTag2id['tag-O'])
         print(
             'SlotTagging: precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
             .format(precision, recall, fscore, accuracy_frame))
 def train(self):
     print('Training model ...')
     self.maxlen_userUtter = self.train_data.maxlen_userUtter
     self.window_size = self.train_data.window_size
     self.word_vocab_size = self.train_data.word_vocab_size
     self.agentAct_vocab_size = self.train_data.agentAct_vocab_size
     self.userTag_vocab_size = self.train_data.userTag_vocab_size
     self.userIntent_vocab_size = self.train_data.userIntent_vocab_size
     self.id2agentAct = self.train_data.id2agentAct
     self.id2userIntent = self.train_data.id2userIntent
     self.id2userTag = self.train_data.id2userTag
     self.id2word = self.train_data.id2word
     self.userTag2id = self.train_data.userTag2id
     if self.context_size is None:
         self.context_size = self.train_data.userTagIntent_vocab_size
     other_npz = '{}/other_vars.npz'.format(self.model_folder)
     train_vars = {
         'id2agentAct': self.id2agentAct,
         'id2userIntent': self.id2userIntent,
         'id2word': self.id2word,
         'id2userTag': self.id2userTag,
         'agentAct_vocab_size': self.agentAct_vocab_size,
         'userIntent_vocab_size': self.userIntent_vocab_size,
         'userTag_vocab_size': self.userTag_vocab_size,
         'word_vocab_size': self.word_vocab_size,
         'maxlen_userUtter': self.maxlen_userUtter,
         'window_size': self.window_size,
         'userTag2id': self.userTag2id
     }
     np.savez_compressed(other_npz, **train_vars)
     self.params['maxlen_userUtter'] = self.maxlen_userUtter
     self.params['window_size'] = self.window_size
     self.params['word_vocab_size'] = self.word_vocab_size
     self.params['agentAct_vocab_size'] = self.agentAct_vocab_size
     self.params['userTag_vocab_size'] = self.userTag_vocab_size
     self.params['userIntent_vocab_size'] = self.userIntent_vocab_size
     print_params(self.params)
     # build model graph, save graph and plot graph
     self._build()
     self._plot_graph()
     graph_yaml = '{}/graph-arch.yaml'.format(self.model_folder)
     with open(graph_yaml, 'w') as fyaml:
         fyaml.write(self.model.to_yaml())
     # load training data
     X_train = self.train_data.userUtter_encodePad_window
     tag_train = self.train_data.userTag_1hotPad_window
     intent_train = self.train_data.userIntent_vecBin_window
     act_train = self.train_data.agentAct_vecBin
     train_utter_txt = self.train_data.userUtter_txt
     train_intent_txt = self.train_data.userIntent_txt
     train_tag_txt = self.train_data.userTag_txt
     train_act_txt = self.train_data.agentAct_txt
     train_utter_fname = '{}/utter_train.target'.format(self.model_folder)
     writeTxt(train_utter_txt, train_utter_fname, prefix='', delimiter=None)
     train_intent_fname = '{}/intent_train.target'.format(self.model_folder)
     writeTxt(train_intent_txt,
              train_intent_fname,
              prefix='intent-',
              delimiter=';')
     train_tag_fname = '{}/tag_train.target'.format(self.model_folder)
     writeTxt(train_tag_txt, train_tag_fname, prefix='tag-', delimiter=None)
     train_act_fname = '{}/act_train.target'.format(self.model_folder)
     writeTxt(train_act_txt, train_act_fname, prefix='act-', delimiter=';')
     # load dev data
     X_dev = self.dev_data.userUtter_encodePad_window
     tag_dev = self.dev_data.userTag_1hotPad_window
     intent_dev = self.dev_data.userIntent_vecBin_window
     act_dev = self.dev_data.agentAct_vecBin
     dev_utter_txt = self.dev_data.userUtter_txt
     dev_intent_txt = self.dev_data.userIntent_txt
     dev_tag_txt = self.dev_data.userTag_txt
     dev_act_txt = self.dev_data.agentAct_txt
     dev_utter_fname = '{}/utter_dev.target'.format(self.model_folder)
     writeTxt(dev_utter_txt, dev_utter_fname, prefix='', delimiter=None)
     dev_intent_fname = '{}/intent_dev.target'.format(self.model_folder)
     writeTxt(dev_intent_txt,
              dev_intent_fname,
              prefix='intent-',
              delimiter=';')
     dev_tag_fname = '{}/tag_dev.target'.format(self.model_folder)
     writeTxt(dev_tag_txt, dev_tag_fname, prefix='tag-', delimiter=None)
     dev_act_fname = '{}/act_dev.target'.format(self.model_folder)
     writeTxt(dev_act_txt, dev_act_fname, prefix='act-', delimiter=';')
     # get mask matrix for train and dev data
     mask_train = np.zeros((X_train.shape[0], X_train.shape[1]))
     mask_train[np.any(X_train != 0, axis=-1)] = 1
     mask_dev = np.zeros((X_dev.shape[0], X_dev.shape[1]))
     mask_dev[np.any(X_dev != 0, axis=-1)] = 1
     mask_dev_maxlen = np.zeros_like(X_dev[:, -1])
     mask_dev_maxlen[X_dev[:, -1] != 0] = 1
     # joint training
     for ep in xrange(self.epoch_nb):
         print('<Epoch {}>'.format(ep))
         self.model.fit(x=X_train,
                        y={
                            'slot_output': tag_train,
                            'intent_output': intent_train,
                            'act_output': act_train
                        },
                        sample_weight={
                            'slot_output': mask_train,
                            'intent_output': mask_train,
                            'act_output': None
                        },
                        batch_size=self.batch_size,
                        nb_epoch=1,
                        verbose=2)
         tag_probs, intent_probs, act_probs = self.model.predict(X_dev)
         # evaluation for agent act
         precision_act, recall_act, fscore_act, accuracy_frame_act, threshold_act = eval_intentPredict(
             act_probs, act_dev)
         print(
             'Agent Act Prediction: ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}, threshold={:.4f}'
             .format(ep, precision_act, recall_act, fscore_act,
                     accuracy_frame_act, threshold_act))
         # evaluation for slot tags
         precision_tag, recall_tag, fscore_tag, accuracy_frame_tag = eval_slotTagging(
             tag_probs[:, -1], mask_dev_maxlen, tag_dev[:, -1],
             self.userTag2id['tag-O'])
         print(
             'SlotTagging: ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
             .format(ep, precision_tag, recall_tag, fscore_tag,
                     accuracy_frame_tag))
         # evaluation for user intent
         precision_intent, recall_intent, fscore_intent, accuracy_frame_intent, threshold_intent = eval_intentPredict(
             intent_probs[:, -1], intent_dev[:, -1])
         print(
             'Intent Prediction: ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}, threshold={:.4f}'
             .format(ep, precision_intent, recall_intent, fscore_intent,
                     accuracy_frame_intent, threshold_intent))
         # frame-level accuracy of NLU
         accuracy_frame_both = getNLUframeAccuracy(
             tag_probs[:, -1], mask_dev_maxlen, tag_dev[:, -1],
             intent_probs[:, -1], intent_dev[:, -1], threshold_intent)
         print('NLU Frame: ep={}, accuracy={:.4f}'.format(
             ep, accuracy_frame_both))
         # save predicted results
         dev_tag_pred_txt, dev_intent_pred_txt = getNLUpred(
             tag_probs[:, -1], mask_dev_maxlen, self.id2userTag,
             intent_probs[:, -1], threshold_intent, self.id2userIntent)
         dev_act_pred_txt = getActPred(act_probs, threshold_act,
                                       self.id2agentAct)
         dev_tag_pred_fname = '{}/dev_results/tag_ep={}.pred'.format(
             self.model_folder, ep)
         writeTxt(dev_tag_pred_txt,
                  dev_tag_pred_fname,
                  prefix='tag-',
                  delimiter=None)
         dev_intent_pred_fname = '{}/dev_results/intent_ep={}.pred'.format(
             self.model_folder, ep)
         writeTxt(dev_intent_pred_txt,
                  dev_intent_pred_fname,
                  prefix='intent-',
                  delimiter=';')
         dev_act_pred_fname = '{}/dev_results/act_ep={}.pred'.format(
             self.model_folder, ep)
         writeTxt(dev_act_pred_txt,
                  dev_act_pred_fname,
                  prefix='act-',
                  delimiter=';')
         dev_utter_pred_fname = '{}/dev_results/utter.txt'.format(
             self.model_folder)
         writeTxt(dev_utter_txt,
                  dev_utter_pred_fname,
                  prefix='',
                  delimiter=None)
         print('Write dev results: {}, {}, {}'.format(
             dev_utter_pred_fname, dev_act_pred_fname, dev_tag_pred_fname,
             dev_intent_pred_fname))
         weights_fname = '{}/weights/ep={}_tagF1={:.4f}_intentF1={:.4f}th={:.4f}_NLUframeAcc={:.4f}_actF1={:.4f}frameAcc={:.4f}th={:.4f}.h5'.format(
             self.model_folder, ep, fscore_tag, fscore_intent,
             threshold_intent, accuracy_frame_both, fscore_act,
             accuracy_frame_act, threshold_act)
         print('Saving Model: {}'.format(weights_fname))
         self.model.save_weights(weights_fname, overwrite=True)