Exemplo n.º 1
0
    while True:
        if data_processor == None:
            data_processor = DataProcessor(os.path.join(full_train_path, arg.input_file), os.path.join(full_train_path, arg.slot_file), os.path.join(full_train_path, arg.intent_file), in_vocab, slot_vocab, intent_vocab)
        in_data, slot_data, slot_weight, length, intents,_,_,_ = data_processor.get_batch(arg.batch_size)
        feed_dict = {input_data.name: in_data, slots.name: slot_data, slot_weights.name: slot_weight, sequence_length.name: length, intent.name: intents}
        ret = sess.run(training_outputs, feed_dict)
        loss += np.mean(ret[1])

        line += arg.batch_size
        step = ret[0]
        num_loss += 1

        if data_processor.end == 1:
            line = 0
            data_processor.close()
            data_processor = None
            epochs += 1
            logging.info('Step: ' + str(step))
            logging.info('Epochs: ' + str(epochs))
            logging.info('Loss: ' + str(loss/num_loss))
            num_loss = 0
            loss = 0.0

            save_path = os.path.join(arg.model_path,'_step_' + str(step) + '_epochs_' + str(epochs) + '.ckpt')
            saver.save(sess, save_path)

            def valid(in_path, slot_path, intent_path):
                data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab)

                pred_intents = []
Exemplo n.º 2
0
            def valid(in_path, slot_path, intent_path):
                data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab)

                pred_intents = []
                correct_intents = []
                slot_outputs = []
                correct_slots = []
                input_words = []

                #used to gate
                gate_seq = []
                while True:
                    in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(arg.batch_size)
                    feed_dict = {input_data.name: in_data, sequence_length.name: length}
                    ret = sess.run(inference_outputs, feed_dict)
                    for i in ret[0]:
                        pred_intents.append(np.argmax(i))
                    for i in intents:
                        correct_intents.append(i)

                    pred_slots = ret[1].reshape((slot_data.shape[0], slot_data.shape[1], -1))
                    for p, t, i, l in zip(pred_slots, slot_data, in_data, length):
                        p = np.argmax(p, 1)
                        tmp_pred = []
                        tmp_correct = []
                        tmp_input = []
                        for j in range(l):
                            tmp_pred.append(slot_vocab['rev'][p[j]])
                            tmp_correct.append(slot_vocab['rev'][t[j]])
                            tmp_input.append(in_vocab['rev'][i[j]])

                        slot_outputs.append(tmp_pred)
                        correct_slots.append(tmp_correct)
                        input_words.append(tmp_input)

                    if data_processor_valid.end == 1:
                        break

                pred_intents = np.array(pred_intents)
                correct_intents = np.array(correct_intents)
                accuracy = (pred_intents==correct_intents)
                semantic_error = accuracy
                accuracy = accuracy.astype(float)
                accuracy = np.mean(accuracy)*100.0

                index = 0
                for t, p in zip(correct_slots, slot_outputs):
                    # Process Semantic Error
                    if len(t) != len(p):
                        raise ValueError('Error!!')

                    for j in range(len(t)):
                        if p[j] != t[j]:
                            semantic_error[index] = False
                            break
                    index += 1
                semantic_error = semantic_error.astype(float)
                semantic_error = np.mean(semantic_error)*100.0

                f1, precision, recall = computeF1Score(correct_slots, slot_outputs)
                logging.info('slot f1: ' + str(f1))
                logging.info('intent accuracy: ' + str(accuracy))
                logging.info('semantic error(intent, slots are all correct): ' + str(semantic_error))

                data_processor_valid.close()
                return f1,accuracy,semantic_error,pred_intents,correct_intents,slot_outputs,correct_slots,input_words,gate_seq
Exemplo n.º 3
0
            def valid(full_path, in_vocab):
                data_processor_valid = DataProcessor(full_path, in_vocab)
                pred_scores_pos = []
                pred_scores_neg = []
                eval_loss = 0
                num_loss = 0

                while True:
                    a_ids_data, a_context_ids_data, a_keyword_index, a_len_data, p_ids_data, p_context_ids_data, \
                    p_keyword_index, p_len_data, n_ids_data, n_context_ids_data, n_keyword_index, n_len_data = \
                        data_processor_valid.get_batch_triple(arg)
                    if len(a_ids_data) != 0:
                        feed_dict = {
                            model.input_a.name: a_ids_data,
                            model.input_a_context.name: a_context_ids_data,
                            model.input_a_keyword_index.name: a_keyword_index,
                            model.input_a_len.name: a_len_data,
                            model.input_n.name: n_ids_data,
                            model.input_n_context.name: n_context_ids_data,
                            model.input_n_keyword_index.name: n_keyword_index,
                            model.input_n_len.name: n_len_data,
                            model.input_p.name: p_ids_data,
                            model.input_p_context.name: p_context_ids_data,
                            model.input_p_keyword_index.name: p_keyword_index,
                            model.input_p_len.name: p_len_data,
                        }

                        ret = sess.run(inference_outputs, feed_dict)
                        eval_loss += np.mean(ret[1])
                        num_loss += 1
                        pred_scores_neg.append(ret[0][0])
                        pred_scores_pos.append(ret[0][1])

                    if data_processor_valid.end == 1:
                        break

                pred_scores = np.concatenate([
                    np.concatenate(pred_scores_pos),
                    np.concatenate(pred_scores_neg)
                ],
                                             axis=0)
                true_scores = np.concatenate([
                    np.ones(np.concatenate(pred_scores_pos).shape),
                    -np.ones(np.concatenate(pred_scores_neg).shape)
                ],
                                             axis=0)
                true_labels = np.concatenate([
                    np.ones(np.concatenate(pred_scores_pos).shape),
                    np.zeros(np.concatenate(pred_scores_neg).shape)
                ],
                                             axis=0)
                fpr, tpr, thresholds = sklearn.metrics.roc_curve(true_scores,
                                                                 pred_scores,
                                                                 pos_label=1)
                auc = sklearn.metrics.auc(fpr, tpr)
                map = sklearn.metrics.average_precision_score(true_labels,
                                                              pred_scores,
                                                              average='micro')
                df = pd.DataFrame({
                    'model': 'triplet',
                    'score': pred_scores,
                    'class': true_scores
                })
                logging.info('Loss: ' + str(eval_loss / num_loss))
                logging.info('AUC: ' + str(auc))
                logging.info('MAP: ' + str(map))
                data_processor_valid.close()
                return (eval_loss / num_loss), auc, df