Exemplo n.º 1
0
    def evaluate(self, text, aspect):
        aspect = aspect.lower().strip()
        text_left, _, text_right = [
            s.strip() for s in text.lower().partition(aspect)
        ]

        text_indices = self.tokenizer.text_to_sequence(text_left + " " +
                                                       aspect + " " +
                                                       text_right)
        context_indices = self.tokenizer.text_to_sequence(text_left + " " +
                                                          text_right)
        left_indices = self.tokenizer.text_to_sequence(text_left)
        left_with_aspect_indices = self.tokenizer.text_to_sequence(text_left +
                                                                   " " +
                                                                   aspect)
        right_indices = self.tokenizer.text_to_sequence(text_right,
                                                        reverse=True)
        right_with_aspect_indices = self.tokenizer.text_to_sequence(
            aspect + " " + text_right, reverse=True)
        aspect_indices = self.tokenizer.text_to_sequence(aspect)
        left_len = np.sum(left_indices != 0)
        aspect_len = np.sum(aspect_indices != 0)
        aspect_boundary = np.asarray([left_len, left_len + aspect_len - 1],
                                     dtype=np.int64)

        text_len = np.sum(text_indices != 0)
        concat_segments_indices = [0] * (text_len + 2) + [1] * (aspect_len + 1)
        concat_segments_indices = word_util.pad_and_truncate(
            concat_segments_indices, self.tokenizer.max_seq_len)

        dependency_graph = dependency_adj_matrix(text)[0]
        head_vector, behead_vector, relation_vector = to_relation_word_message(
            dependency_graph)

        data = {
            'concat_segments_indices': concat_segments_indices,
            'text_indices': text_indices,
            'context_indices': context_indices,
            'left_indices': left_indices,
            'left_with_aspect_indices': left_with_aspect_indices,
            'right_indices': right_indices,
            'right_with_aspect_indices': right_with_aspect_indices,
            'aspect_indices': aspect_indices,
            'aspect_boundary': aspect_boundary,
            'dependency_graph': dependency_graph,
            'head_vector': head_vector,
            'behead_vector': behead_vector,
            'relation_vector': relation_vector
        }

        t_inputs = [
            torch.tensor([data[col]], device=self.opt.device)
            for col in self.opt.input_controller
        ]
        t_outputs = self.model(t_inputs)
        t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy()

        return t_probs
Exemplo n.º 2
0
    def evaluate(self, raw_text):
        text_seqs = [self.tokenizer.text_to_sequence(raw_text.lower())]
        # aspect_seqs = [self.tokenizer.text_to_sequence(aspect.lower())]
        # left_seqs = [self.tokenizer.text_to_sequence(raw_text.lower().split(aspect.lower())[0])]
        text_indices = torch.tensor(text_seqs, dtype=torch.int64)
        # aspect_indices = torch.tensor(aspect_seqs, dtype=torch.int64)
        # left_indices = torch.tensor(left_seqs, dtype=torch.int64)
        dependency_graph = torch.tensor(
            [dependency_adj_matrix(raw_text.lower())])
        data = {
            'text_indices': text_indices,
            'dependency_graph': dependency_graph
        }
        t_inputs = [data[col].to(opt.device) for col in self.opt.inputs_cols]
        t_outputs = self.model(t_inputs)

        t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy()
        return t_probs
Exemplo n.º 3
0
    def evaluate(self, text, aspect):
        aspect = aspect.lower().strip()
        text_left, _, text_right = [
            s.strip() for s in text.lower().partition(aspect)
        ]

        text_indices = self.tokenizer.text_to_sequence(text_left + " " +
                                                       aspect + " " +
                                                       text_right)
        context_indices = self.tokenizer.text_to_sequence(text_left + " " +
                                                          text_right)
        left_indices = self.tokenizer.text_to_sequence(text_left)
        left_with_aspect_indices = self.tokenizer.text_to_sequence(text_left +
                                                                   " " +
                                                                   aspect)
        right_indices = self.tokenizer.text_to_sequence(text_right,
                                                        reverse=True)
        right_with_aspect_indices = self.tokenizer.text_to_sequence(
            aspect + " " + text_right, reverse=True)
        aspect_indices = self.tokenizer.text_to_sequence(aspect)
        left_len = np.sum(left_indices != 0)
        aspect_len = np.sum(aspect_indices != 0)
        aspect_boundary = np.asarray([left_len, left_len + aspect_len - 1],
                                     dtype=np.int64)

        text_len = np.sum(text_indices != 0)
        concat_bert_indices = self.tokenizer.text_to_sequence('[CLS] ' +
                                                              text_left + " " +
                                                              aspect + " " +
                                                              text_right +
                                                              ' [SEP] ' +
                                                              aspect +
                                                              " [SEP]")
        concat_segments_indices = [0] * (text_len + 2) + [1] * (aspect_len + 1)
        concat_segments_indices = pad_and_truncate(concat_segments_indices,
                                                   self.tokenizer.max_seq_len)

        text_bert_indices = self.tokenizer.text_to_sequence("[CLS] " +
                                                            text_left + " " +
                                                            aspect + " " +
                                                            text_right +
                                                            " [SEP]")
        aspect_bert_indices = self.tokenizer.text_to_sequence("[CLS] " +
                                                              aspect +
                                                              " [SEP]")

        dependency_graph = dependency_adj_matrix(text)

        data = {
            'concat_bert_indices': concat_bert_indices,
            'concat_segments_indices': concat_segments_indices,
            'text_bert_indices': text_bert_indices,
            'aspect_bert_indices': aspect_bert_indices,
            'text_indices': text_indices,
            'context_indices': context_indices,
            'left_indices': left_indices,
            'left_with_aspect_indices': left_with_aspect_indices,
            'right_indices': right_indices,
            'right_with_aspect_indices': right_with_aspect_indices,
            'aspect_indices': aspect_indices,
            'aspect_boundary': aspect_boundary,
            'dependency_graph': dependency_graph,
        }

        t_inputs = [
            torch.tensor([data[col]], device=self.opt.device)
            for col in self.opt.inputs_cols
        ]
        t_outputs = self.model(t_inputs)
        t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy()

        return t_probs