Exemplo n.º 1
0
    def validate_per_intent(model_path, dataset_path, batch_size):
        """
        validation will write mistaken samples to files and make scores.
        """

        model = torch.load(model_path, map_location='cpu')
        dataset = torch.load(dataset_path, map_location='cpu')

        # Get the sentence list in test dataset.
        sent_list = dataset.test_sentence

        pred_slot, real_slot, exp_pred_intent, real_intent, pred_intent = Processor.prediction(
            model, dataset, "test", batch_size)

        all_data = defaultdict(dict)
        for i in set(real_intent):
            all_data[i] = {
                'pred_slot': [],
                'real_slot': [],
                'exp_pred_intent': [],
                'real_intent': [],
                'pred_intent': []
            }

        length = len(real_intent)
        for i in range(length):
            all_data[real_intent[i]]['pred_slot'].append(pred_slot[i])
            all_data[real_intent[i]]['real_slot'].append(real_slot[i])
            all_data[real_intent[i]]['exp_pred_intent'].append(
                exp_pred_intent[i])
            all_data[real_intent[i]]['real_intent'].append(real_intent[i])
            all_data[real_intent[i]]['pred_intent'].append(pred_intent[i])

        slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0]
        intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot,
                                          exp_pred_intent, real_intent)

        result_per_intent = defaultdict(list)
        result_per_intent['overall'] = [
            slot_f1, intent_acc, sent_acc,
            len(real_intent)
        ]
        for intent in all_data:
            pred_slot = all_data[intent]['pred_slot']
            real_slot = all_data[intent]['real_slot']
            exp_pred_intent = all_data[intent]['exp_pred_intent']
            real_intent = all_data[intent]['real_intent']
            pred_intent = all_data[intent]['pred_intent']
            length = len(exp_pred_intent)

            slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0]
            intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent)
            sent_acc = Evaluator.semantic_acc(pred_slot, real_slot,
                                              exp_pred_intent, real_intent)
            result_per_intent[intent] = [slot_f1, intent_acc, sent_acc, length]

        return result_per_intent
Exemplo n.º 2
0
    def validate(model_path, dataset_path, batch_size):
        """
        validation will write mistaken samples to files and make scores.
        """

        model = torch.load(model_path)
        dataset = torch.load(dataset_path)

        # Get the sentence list in test dataset.
        sent_list = dataset.test_sentence

        pred_slot, real_slot, exp_pred_intent, real_intent, pred_intent = Processor.prediction(
            model, dataset, "test", batch_size)

        # To make sure the directory for save error prediction.
        mistake_dir = os.path.join(dataset.save_dir, "error")
        if not os.path.exists(mistake_dir):
            os.mkdir(mistake_dir)

        slot_file_path = os.path.join(mistake_dir, "slot.txt")
        intent_file_path = os.path.join(mistake_dir, "intent.txt")
        both_file_path = os.path.join(mistake_dir, "both.txt")

        # Write those sample with mistaken slot prediction.
        with open(slot_file_path, 'w') as fw:
            for w_list, r_slot_list, p_slot_list in zip(
                    sent_list, real_slot, pred_slot):
                if r_slot_list != p_slot_list:
                    for w, r, p in zip(w_list, r_slot_list, p_slot_list):
                        fw.write(w + '\t' + r + '\t' + p + '\n')
                    fw.write('\n')

        # Write those sample with mistaken intent prediction.
        with open(intent_file_path, 'w') as fw:
            for w_list, p_intent_list, r_intent, p_intent in zip(
                    sent_list, pred_intent, real_intent, exp_pred_intent):
                if p_intent != r_intent:
                    for w, p in zip(w_list, p_intent_list):
                        fw.write(w + '\t' + p + '\n')
                    fw.write(r_intent + '\t' + p_intent + '\n\n')

        # Write those sample both have intent and slot errors.
        with open(both_file_path, 'w') as fw:
            for w_list, r_slot_list, p_slot_list, p_intent_list, r_intent, p_intent in \
                    zip(sent_list, real_slot, pred_slot, pred_intent, real_intent, exp_pred_intent):

                if r_slot_list != p_slot_list or r_intent != p_intent:
                    for w, r_slot, p_slot, p_intent_ in zip(
                            w_list, r_slot_list, p_slot_list, p_intent_list):
                        fw.write(w + '\t' + r_slot + '\t' + p_slot + '\t' +
                                 p_intent_ + '\n')
                    fw.write(r_intent + '\t' + p_intent + '\n\n')

        slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0]
        intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot,
                                          exp_pred_intent, real_intent)

        return slot_f1, intent_acc, sent_acc
Exemplo n.º 3
0
    def validate(model_path, dataset, batch_size, num_intent, args):
        """
        validation will write mistaken samples to files and make scores.
        """

        if args.gpu:
            model = torch.load(model_path)
        else:
            model = torch.load(model_path, map_location=torch.device('cpu'))

        ss, pred_slot, real_slot, pred_intent, real_intent = Processor.prediction(
            model, dataset, "test", batch_size, args)

        # To make sure the directory for save error prediction.
        mistake_dir = os.path.join(dataset.save_dir, "error")
        if not os.path.exists(mistake_dir):
            os.mkdir(mistake_dir)

        slot_f1_score = miulab.computeF1Score(ss, real_slot, pred_slot,
                                              args)[0]
        intent_f1_score = f1_score(
            instance2onehot(dataset.intent_alphabet.get_index, num_intent,
                            real_intent),
            instance2onehot(dataset.intent_alphabet.get_index, num_intent,
                            pred_intent),
            average='macro')
        intent_acc_score = Evaluator.intent_acc(pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent,
                                          real_intent)
        print(
            "slot f1: {}, intent f1: {}, intent acc: {}, exact acc: {}".format(
                slot_f1_score, intent_f1_score, intent_acc_score, sent_acc))
        # Write those sample both have intent and slot errors.

        with open(os.path.join(args.save_dir, 'error.txt'),
                  'w',
                  encoding="utf8") as fw:
            for p_slot_list, r_slot_list, p_intent_list, r_intent in \
                    zip(pred_slot, real_slot, pred_intent, real_intent):
                fw.write(','.join(p_intent_list) + '\t' + ','.join(r_intent) +
                         '\n')
                for w, r_slot, in zip(p_slot_list, r_slot_list):
                    fw.write(w + '\t' + r_slot + '\t' '\n')
                fw.write('\n\n')
        # with open(os.path.join(args.save_dir, 'slot_right.txt'), 'w', encoding="utf8") as fw:
        #     for p_slot_list, r_slot_list, tokens in \
        #             zip(pred_slot, real_slot, ss):
        #         if p_slot_list != r_slot_list:
        #             continue
        #         fw.write(' '.join(tokens) + '\n' + ' '.join(r_slot_list) + '\n' + ' '.join(p_slot_list) + '\n' + '\n\n')

        return slot_f1_score, intent_f1_score, intent_acc_score, sent_acc
Exemplo n.º 4
0
    def estimate(self, if_dev, test_batch=100):
        if if_dev:
            pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction(
                self.__model, self.__dataset, "dev", test_batch)
        else:
            pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction(
                self.__model, self.__dataset, "test", test_batch)

        slot_f1_socre = miulab.computeF1Score(pred_slot, real_slot)[0]
        intent_acc = Evaluator.accuracy(pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent,
                                          real_intent)

        return slot_f1_socre, intent_acc, sent_acc
Exemplo n.º 5
0
    def estimate(self, if_dev, test_batch=100):
        """
        Estimate the performance of model on dev or test dataset.
        """

        if if_dev:
            pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction(
                self.__model, self.__dataset, "test", test_batch)
        else:
            pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction(
                self.__model, self.__dataset, "dev", test_batch)
        # 用于计算F1

        slot_f1_socre = miulab.computeF1Score(pred_slot, real_slot)[0]
        # slot_f1_socre = Evaluator.f1_score(pred_slot, real_slot)

        intent_acc = Evaluator.accuracy(pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent,
                                          real_intent)

        return slot_f1_socre, intent_acc, sent_acc
Exemplo n.º 6
0
    def estimate(self, if_dev, args, test_batch=100):
        """
        Estimate the performance of model on dev or test dataset.
        """

        if if_dev:
            ss, pred_slot, real_slot, pred_intent, real_intent = self.prediction(
                self.__model, self.__dataset, "dev", test_batch, args)
        else:
            ss, pred_slot, real_slot, pred_intent, real_intent = self.prediction(
                self.__model, self.__dataset, "test", test_batch, args)

        num_intent = len(self.__dataset.intent_alphabet)
        slot_f1_score = miulab.computeF1Score(ss, real_slot, pred_slot,
                                              args)[0]
        intent_f1_score = f1_score(
            instance2onehot(self.__dataset.intent_alphabet.get_index,
                            num_intent, real_intent),
            instance2onehot(self.__dataset.intent_alphabet.get_index,
                            num_intent, pred_intent),
            average='macro')
        intent_acc_score = Evaluator.intent_acc(pred_intent, real_intent)
        sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent,
                                          real_intent)
        print(
            "slot f1: {}, intent f1: {}, intent acc: {}, exact acc: {}".format(
                slot_f1_score, intent_f1_score, intent_acc_score, sent_acc))
        # Write those sample both have intent and slot errors.
        with open(os.path.join(args.save_dir, 'error.txt'),
                  'w',
                  encoding="utf8") as fw:
            for p_slot_list, r_slot_list, p_intent_list, r_intent in \
                    zip(pred_slot, real_slot, pred_intent, real_intent):
                fw.write(','.join(p_intent_list) + '\t' + ','.join(r_intent) +
                         '\n')
                for w, r_slot, in zip(p_slot_list, r_slot_list):
                    fw.write(w + '\t' + r_slot + '\t' '\n')
                fw.write('\n\n')

        return slot_f1_score, intent_f1_score, intent_acc_score, sent_acc
 def get_f1(pred_list, real_list):
     return computeF1Score(real_list, pred_list)[0]