示例#1
0
    def test_eval(self):
        data = DATAMultiWOZ(debug=False, data_dir=self.data_dir)
        test_examples = data.read_examples(
            os.path.join(self.data_dir, 'test.json'))
        print('eval_examples的数量', len(test_examples))

        dialogueID = [x.guid for x in test_examples]

        test_features = data.convert_examples_to_features(
            test_examples, self.tokenizer, self.max_seq_length)
        test_input_ids = torch.tensor(data.select_field(
            test_features, 'input_ids'),
                                      dtype=torch.long)
        test_input_mask = torch.tensor(data.select_field(
            test_features, 'input_mask'),
                                       dtype=torch.long)
        test_segment_ids = torch.tensor(data.select_field(
            test_features, 'segment_ids'),
                                        dtype=torch.long)
        test_utterance_mask = torch.tensor(data.select_field(
            test_features, 'utterance_mask'),
                                           dtype=torch.long)
        test_domainslot_mask = torch.tensor(data.select_field(
            test_features, 'domainslot_mask'),
                                            dtype=torch.long)
        test_label_tokens_start = torch.tensor(
            [f.label_tokens_start for f in test_features], dtype=torch.long)
        test_label_tokens_end = torch.tensor(
            [f.label_tokens_end for f in test_features], dtype=torch.long)
        test_label_sentence_domainslot = torch.tensor(
            [f.label_sentence_domainslot for f in test_features],
            dtype=torch.long)
        test_label_tokens_domainslot = torch.tensor(
            [f.label_tokens_domainslot for f in test_features],
            dtype=torch.long)

        test_hist_tokens = [f.hist_token for f in test_features]

        test_data = TensorDataset(
            test_input_ids, test_input_mask, test_segment_ids,
            test_utterance_mask, test_domainslot_mask, test_label_tokens_start,
            test_label_tokens_end, test_label_sentence_domainslot,
            test_label_tokens_domainslot)
        # Run prediction for full data
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data,
                                     sampler=test_sampler,
                                     batch_size=self.eval_batch_size)

        config = BertConfig.from_pretrained(self.model_name_or_path)
        model = BertForTokenClassification.from_pretrained(os.path.join(
            self.output_dir, "pytorch_model.bin"),
                                                           self.args,
                                                           config=config)
        model.to(self.device)
        model.eval()

        gold_labels_tokens_start = []
        gold_labels_tokens_end = []
        gold_label_sentence_domainslot = []
        gold_label_tokens_domainslot = []
        scores_tokens_start = []
        scores_tokens_end = []
        scores_sentence_domainslot = []
        scores_tokens_domainslot = []

        for input_ids, input_mask, segment_ids, \
            utterance_mask, domainslot_mask, \
            label_tokens_start, label_tokens_end, \
            label_sentence_domainslot, label_tokens_domainslot in test_dataloader:

            input_ids = input_ids.to(self.device)
            input_mask = input_mask.to(self.device)
            segment_ids = segment_ids.to(self.device)
            utterance_mask = utterance_mask.to(self.device)
            domainslot_mask = domainslot_mask.to(self.device)
            label_tokens_start = label_tokens_start.to(self.device)
            label_tokens_end = label_tokens_end.to(self.device)
            label_sentence_domainslot = label_sentence_domainslot.to(
                self.device)
            # print(label_sentence_domainslot.size())
            # print(label_sentence_domainslot)
            label_tokens_domainslot = label_tokens_domainslot.to(self.device)

            with torch.no_grad():
                batch_eval_loss_tokens_start, batch_eval_loss_tokens_end, batch_eval_loss_sentence_domainslot, batch_eval_loss_tokens_domainslot = model(
                    input_ids=input_ids,
                    token_type_ids=segment_ids,
                    attention_mask=input_mask,
                    utterance_mask=utterance_mask,
                    domainslot_mask=domainslot_mask,
                    label_tokens_start=label_tokens_start,
                    label_tokens_end=label_tokens_end,
                    label_sentence_domainslot=label_sentence_domainslot,
                    label_tokens_domainslot=label_tokens_domainslot)
                logits_tokens_start, logits_tokens_end, logits_sentence_domainslot, logits_tokens_domainslot = model(
                    input_ids=input_ids,
                    token_type_ids=segment_ids,
                    attention_mask=input_mask,
                    utterance_mask=utterance_mask,
                    domainslot_mask=domainslot_mask)

            logits_tokens_start = logits_tokens_start.view(-1, 2).cpu().numpy()
            logits_tokens_end = logits_tokens_end.view(-1, 2).cpu().numpy()
            logits_tokens_domainslot = logits_tokens_domainslot.view(
                -1, 2).detach().cpu().numpy()
            logits_sentence_domainslot = logits_sentence_domainslot.view(
                -1, 2).cpu().numpy()

            label_tokens_start = label_tokens_start.view(-1).to('cpu').numpy()
            label_tokens_end = label_tokens_end.view(-1).to('cpu').numpy()
            label_sentence_domainslot = label_sentence_domainslot.to(
                'cpu').numpy()
            label_tokens_domainslot = label_tokens_domainslot.to('cpu').numpy()

            scores_tokens_start.append(logits_tokens_start)
            scores_tokens_end.append(logits_tokens_end)
            scores_sentence_domainslot.append(logits_sentence_domainslot)
            scores_tokens_domainslot.append(logits_tokens_domainslot)

            gold_labels_tokens_start.append(label_tokens_start)
            gold_labels_tokens_end.append(label_tokens_end)
            gold_label_sentence_domainslot.append(label_sentence_domainslot)
            gold_label_tokens_domainslot.append(label_tokens_domainslot)

        gold_labels_tokens_start = np.concatenate(gold_labels_tokens_start, 0)
        gold_labels_tokens_end = np.concatenate(gold_labels_tokens_end, 0)
        gold_label_sentence_domainslot = np.concatenate(
            gold_label_sentence_domainslot, 0)
        gold_label_tokens_domainslot = np.concatenate(
            gold_label_tokens_domainslot, 0)

        scores_tokens_start = np.concatenate(scores_tokens_start, 0)
        scores_tokens_end = np.concatenate(scores_tokens_end, 0)
        scores_sentence_domainslot = np.concatenate(scores_sentence_domainslot,
                                                    0)
        scores_tokens_domainslot = np.concatenate(scores_tokens_domainslot, 0)

        # 计算评价指标
        # eval_accuracy_domain = accuracyF1(scores_domain, gold_labels_domain,mode='domain',report=True)
        # eval_accuracy_dependcy = accuracyF1(scores_dependcy, gold_labels_dependcy,mode='dependcy',report=True)
        eval_F1_tokenstart, eval_F1_tokenend, F1_sentence_domainslot, F1_token_domainslot = compute_jointGoal_domainslot_1_(
            dialogueID, test_hist_tokens, scores_tokens_start,
            scores_tokens_end, scores_sentence_domainslot,
            scores_tokens_domainslot, gold_labels_tokens_start,
            gold_labels_tokens_end, gold_label_sentence_domainslot,
            gold_label_tokens_domainslot)

        print('F1_token_domainslot', F1_token_domainslot,
              'F1_sentence_domainslot', F1_sentence_domainslot,
              'eval_F1_tokenstart', eval_F1_tokenstart, 'eval_F1_tokenend',
              eval_F1_tokenend)
示例#2
0
    def create_dataloader(self):
        data = DATAMultiWOZ(
            debug=False,
            data_dir=self.data_dir,
        )
        train_examples = data.read_examples(
            os.path.join(self.data_dir, 'train.json'))
        train_features = data.convert_examples_to_features(
            train_examples, self.tokenizer, self.max_seq_length)
        all_input_ids = torch.tensor(data.select_field(train_features,
                                                       'input_ids'),
                                     dtype=torch.long)
        all_input_mask = torch.tensor(data.select_field(
            train_features, 'input_mask'),
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(data.select_field(
            train_features, 'segment_ids'),
                                       dtype=torch.long)
        all_utterance_mask = torch.tensor(data.select_field(
            train_features, 'utterance_mask'),
                                          dtype=torch.long)
        all_domainslot_mask = torch.tensor(data.select_field(
            train_features, 'domainslot_mask'),
                                           dtype=torch.long)
        all_label_tokens_start = torch.tensor(
            [f.label_tokens_start for f in train_features], dtype=torch.long)
        all_label_tokens_end = torch.tensor(
            [f.label_tokens_end for f in train_features], dtype=torch.long)
        all_label_sentence_domainslot = torch.tensor(
            [f.label_sentence_domainslot for f in train_features],
            dtype=torch.long)
        all_label_tokens_domainslot = torch.tensor(
            [f.label_tokens_domainslot for f in train_features],
            dtype=torch.long)

        all_hist_tokens = [f.hist_token for f in train_features]

        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_utterance_mask,
                                   all_domainslot_mask, all_label_tokens_start,
                                   all_label_tokens_end,
                                   all_label_sentence_domainslot,
                                   all_label_tokens_domainslot)

        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.train_batch_size)

        eval_examples = data.read_examples(
            os.path.join(self.data_dir, 'test.json'))
        eval_features = data.convert_examples_to_features(
            eval_examples, self.tokenizer, self.max_seq_length)
        eval_input_ids = torch.tensor(data.select_field(
            eval_features, 'input_ids'),
                                      dtype=torch.long)
        eval_input_mask = torch.tensor(data.select_field(
            eval_features, 'input_mask'),
                                       dtype=torch.long)
        eval_segment_ids = torch.tensor(data.select_field(
            eval_features, 'segment_ids'),
                                        dtype=torch.long)
        eval_utterance_mask = torch.tensor(data.select_field(
            eval_features, 'utterance_mask'),
                                           dtype=torch.long)
        eval_domainslot_mask = torch.tensor(data.select_field(
            eval_features, 'domainslot_mask'),
                                            dtype=torch.long)
        eval_label_tokens_start = torch.tensor(
            [f.label_tokens_start for f in eval_features], dtype=torch.long)
        eval_label_tokens_end = torch.tensor(
            [f.label_tokens_end for f in eval_features], dtype=torch.long)
        eval_label_sentence_domainslot = torch.tensor(
            [f.label_sentence_domainslot for f in eval_features],
            dtype=torch.long)
        eval_label_tokens_domainslot = torch.tensor(
            [f.label_tokens_domainslot for f in eval_features],
            dtype=torch.long)

        eval_hist_tokens = [f.hist_token for f in eval_features]

        eval_data = TensorDataset(
            eval_input_ids, eval_input_mask, eval_segment_ids,
            eval_utterance_mask, eval_domainslot_mask, eval_label_tokens_start,
            eval_label_tokens_end, eval_label_sentence_domainslot,
            eval_label_tokens_domainslot)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.eval_batch_size)

        return train_dataloader, eval_dataloader, train_examples, eval_examples, all_hist_tokens, eval_hist_tokens
示例#3
0
    def test_eval(self):
        data = DATAMultiWOZ(debug=False, data_dir=self.data_dir)
        test_examples = data.read_examples(
            os.path.join(self.data_dir, 'test.json'))
        print('eval_examples的数量', len(test_examples))

        dialogueID = [x.guid for x in test_examples]
        utterance_text = [x.text_history for x in test_examples]

        test_features = data.convert_examples_to_features(
            test_examples, self.tokenizer, self.max_seq_length)
        all_input_ids = torch.tensor(data.select_field(test_features,
                                                       'input_ids'),
                                     dtype=torch.long)
        all_input_mask = torch.tensor(data.select_field(
            test_features, 'input_mask'),
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(data.select_field(
            test_features, 'segment_ids'),
                                       dtype=torch.long)
        eval_labels_domainslot = torch.tensor(
            [f.labels_domainslot for f in test_features], dtype=torch.float)
        eval_labels_domain = torch.tensor(
            [f.labels_domain for f in test_features], dtype=torch.long)
        eval_labels_dependcy = torch.tensor(
            [f.labels_dependcy for f in test_features], dtype=torch.long)

        test_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, eval_labels_domainslot,
                                  eval_labels_domain, eval_labels_dependcy)
        # Run prediction for full data
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data,
                                     sampler=test_sampler,
                                     batch_size=self.eval_batch_size)

        config = BertConfig.from_pretrained(self.model_name_or_path)
        model = BertForTokenClassification.from_pretrained(os.path.join(
            self.output_dir, "pytorch_model.bin"),
                                                           self.args,
                                                           config=config)
        model.to(self.device)
        model.eval()

        inference_labels = []
        gold_labels_domain = []
        gold_labels_dependcy = []
        gold_labels_domainslot = []
        scores_domainslot = []
        scores_domain = []
        scores_dependcy = []

        for input_ids, input_mask, segment_ids, label_domainslot, label_domain, label_dependcy in test_dataloader:
            input_ids = input_ids.to(self.device)
            input_mask = input_mask.to(self.device)
            segment_ids = segment_ids.to(self.device)
            label_domainslot = label_domainslot.to(self.device)
            label_domain = label_domain.to(self.device)
            label_dependcy = label_dependcy.to(self.device)

            with torch.no_grad():
                logits_domainslot, logits_domain, logits_dependcy = model(
                    input_ids=input_ids,
                    token_type_ids=segment_ids,
                    attention_mask=input_mask)
            logits_domainslot = torch.sigmoid(logits_domainslot)
            logits_domainslot = (logits_domainslot > 0.4).float()
            logits_domainslot = logits_domainslot.cpu().long().numpy()

            logits_domain = logits_domain.view(
                -1, self.num_labels_domain).cpu().numpy()
            logits_dependcy = logits_dependcy.view(
                -1, self.num_labels_dependcy).cpu().numpy()

            label_domainslot = label_domainslot.to('cpu').numpy()
            label_domain = label_domain.view(-1).to('cpu').numpy()
            label_dependcy = label_dependcy.view(-1).to('cpu').numpy()

            scores_domainslot.append(logits_domainslot)
            scores_domain.append(logits_domain)
            scores_dependcy.append(logits_dependcy)

            gold_labels_domainslot.append(label_domainslot)
            gold_labels_domain.append(label_domain)
            gold_labels_dependcy.append(label_dependcy)

        gold_labels_domainslot = np.concatenate(gold_labels_domainslot, 0)
        gold_labels_domain = np.concatenate(gold_labels_domain, 0)
        gold_labels_dependcy = np.concatenate(gold_labels_dependcy, 0)
        scores_domainslot = np.concatenate(scores_domainslot, 0)
        scores_domain = np.concatenate(scores_domain, 0)
        scores_dependcy = np.concatenate(scores_dependcy, 0)

        # 计算评价指标
        assert scores_domain.shape[0] == scores_dependcy.shape[
            0] == gold_labels_domain.shape[0] == gold_labels_dependcy.shape[0]
        eval_accuracy_domain = accuracyF1(scores_domain,
                                          gold_labels_domain,
                                          mode='domain',
                                          report=True)
        eval_accuracy_dependcy = accuracyF1(scores_dependcy,
                                            gold_labels_dependcy,
                                            mode='dependcy',
                                            report=True)
        eval_jointGoal = compute_jointGoal_domainslot(
            dialogueID, utterance_text, scores_domainslot,
            gold_labels_domainslot, scores_domain, gold_labels_domain,
            scores_dependcy, gold_labels_dependcy)
        print('eval_accuracy_domain', eval_accuracy_domain)
        print('eval_accuracy_dependcy', eval_accuracy_dependcy)
        print('eval_jointGoal', eval_jointGoal)