Пример #1
0
    def predict(self, inputs):
        total_sen = []
        total_indicator = []
        length = []
        for n in inputs:
            sen = n[0]
            en1, en2 = n[1].split()
            word_id = data_loader.word2id(list(sen), self.vocab)
            total_sen.append(word_id)
            en_indicator = [0] * len(sen)
            en_indicator[sen.index(en1):sen.index(en1) +
                         len(en1)] = [1] * len(en1)
            en_indicator[sen.index(en2):sen.index(en2) +
                         len(en2)] = [-1] * len(en2)
            total_indicator.append(en_indicator)
            length.append(len(sen))

        total_sen = tf.keras.preprocessing.sequence.pad_sequences(
            total_sen, dtype='int64', padding='post')
        total_indicator = tf.keras.preprocessing.sequence.pad_sequences(
            total_indicator, dtype='int64', padding='post')
        length = np.array(length)

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
            'word_id': total_sen,
            'en_indicator': total_indicator,
            'length': length
        },
                                                              batch_size=512,
                                                              num_epochs=1,
                                                              shuffle=False)
        results = list(self.estimator.predict(input_fn=predict_input_fn))
        results = data_loader.id2rel(results, self.rel_dict)
        return results
Пример #2
0
 def predict(self, text):
     if not isinstance(text, list):
         text = [text]
     length = np.array([len(t) for t in text])
     word_id = [data_loader.word2id(list(t), self.vocab) for t in text]
     char_images = [
         data_loader.word2image(list(t), self.char2image) for t in text
     ]
     word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id,
                                                             dtype='int64',
                                                             padding='post')
     char_images = tf.keras.preprocessing.sequence.pad_sequences(
         char_images, dtype='float32', padding='post')
     char_images = (char_images / 255 - 0.5) * 2
     predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
         "word_id": word_id,
         'char_image': char_images,
         'length': length
     },
                                                           batch_size=512,
                                                           num_epochs=1,
                                                           shuffle=False)
     labels = list(self.estimator.predict(input_fn=predict_input_fn))
     return [
         data_loader.id2label(labels[i][:length[i]], self.tag)
         for i in range(len(text))
     ]
Пример #3
0
    def predict(self, sen, pos):
        assert len(sen) == len(pos)
        if not isinstance(sen[0], list):
            sen = [sen]
        if not isinstance(pos[0], list):
            pos = [pos]

        length = np.array([len(s) + 1 for s in sen])
        word_id = [[0] + data_loader.word2id(s, self.vocab) for s in sen]
        pos_id = [[0] + data_loader.pos2id(p, self.pos_dict) for p in pos]
        word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, dtype='int64', padding='post')
        pos_id = tf.keras.preprocessing.sequence.pad_sequences(pos_id, dtype='int64', padding='post')

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"word_id": word_id, 'pos_id': pos_id, 'length': length},
            batch_size=128,
            num_epochs=1,
            shuffle=False)
        pred = list(self.estimator.predict(input_fn=predict_input_fn))
        results = []
        for i in range(len(pred)):
            result = []
            arc = mst(pred[i]['arc_logits'][:length[i], :length[i]])[1:]
            label = np.argmax(pred[i]['label_logits'][range(1, length[i]), arc, :], -1)
            label = data_loader.id2dep(label, self.dep_dict)
            [result.append((w, p, str(a), l)) for w, p, a, l in zip(sen[i], pos[i], arc, label)]
            results.append(result)
        return results
Пример #4
0
    def predict(self, inputs):
        premise_words = [data_loader.pad_to_fixed_len(n[0]) for n in inputs]
        premise_tags = [data_loader.pad_to_fixed_len(n[1]) for n in inputs]
        hypothesis_words = [data_loader.pad_to_fixed_len(n[2]) for n in inputs]
        hypothesis_tags = [data_loader.pad_to_fixed_len(n[3]) for n in inputs]

        p_word_id = np.array([data_loader.word2id(s, self.vocab) for s in premise_words], np.int64)
        p_pos_id = np.array([data_loader.pos2id(p, self.pos_dict) for p in premise_tags], np.int64)
        h_word_id = np.array([data_loader.word2id(s, self.vocab) for s in hypothesis_words], np.int64)
        h_pos_id = np.array([data_loader.pos2id(p, self.pos_dict) for p in hypothesis_tags], np.int64)
        p_char_images = np.array([data_loader.word2image(s) for s in premise_words], np.float32) / 127.5 - 1
        h_char_images = np.array([data_loader.word2image(s) for s in hypothesis_words], np.float32) / 127.5 - 1

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={'p_word_id': p_word_id, 'p_pos_id': p_pos_id, 'h_word_id': h_word_id, 'h_pos_id': h_pos_id,
               'p_char_images': p_char_images, 'h_char_images': h_char_images},
            batch_size=20,
            num_epochs=1,
            shuffle=False)
        results = list(self.estimator.predict(input_fn=predict_input_fn))
        return results
Пример #5
0
    def predict(self, text):
        if not isinstance(text, list):
            text = [text]
        length = np.array([len(t) for t in text])
        word_id = [data_loader.word2id(list(t), self.vocab) for t in text]
        word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id,
                                                                padding='post')

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
            "word_id": word_id,
            'length': length
        },
                                                              batch_size=1024,
                                                              num_epochs=1,
                                                              shuffle=False)
        labels = list(self.estimator.predict(input_fn=predict_input_fn))
        return [[id2label[l] for l in labels[i][:length[i]]]
                for i in range(len(text))]
Пример #6
0
    def predict(self, text):
        if not isinstance(text, list):
            text = [text]
        word_id = [data_loader.word2id(list(t), self.vocab) for t in text]
        word_id = tf.keras.preprocessing.sequence.pad_sequences(
            word_id,
            maxlen=Config.data.max_sequence_length,
            dtype='int64',
            padding='post',
            truncating='post')

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x=word_id,
                                                              batch_size=512,
                                                              num_epochs=1,
                                                              shuffle=False)
        labels = list(self.estimator.predict(input_fn=predict_input_fn))
        return [
            data_loader.id2label(labels[i], self.tag) for i in range(len(text))
        ]
Пример #7
0
    def predict(self, sen, pos):
        assert len(sen) == len(pos)
        if not isinstance(sen[0], list):
            sen = [sen]
        if not isinstance(pos[0], list):
            pos = [pos]

        length = np.array([len(s) for s in sen])
        word_id = [data_loader.word2id(s, self.vocab) for s in sen]
        pos_id = [data_loader.pos2id(p, self.pos_dict) for p in pos]
        word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id,
                                                                dtype='int64',
                                                                padding='post')
        pos_id = tf.keras.preprocessing.sequence.pad_sequences(pos_id,
                                                               dtype='int64',
                                                               padding='post')

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
            "word_id": word_id,
            'pos_id': pos_id,
            'length': length
        },
                                                              batch_size=128,
                                                              num_epochs=1,
                                                              shuffle=False)
        pred = list(self.estimator.predict(input_fn=predict_input_fn))
        results = []
        for i in range(len(pred)):
            result = []
            sample_head = pred[i]['pred_head'][:length[i]]
            sample_dep = pred[i]['pred_dep'][:length[i]]
            for n, word_head in enumerate(sample_head):
                for j, head in enumerate(word_head):
                    if head == -1:
                        continue
                    else:
                        result.append(
                            (sen[i][n], pos[i][n], str(head),
                             data_loader.id2dep(sample_dep[n][j],
                                                self.dep_dict)))
            results.append(result)
        return results
Пример #8
0
    def predict(self, inputs):
        word_id = []
        tag_id = []
        predicate = []
        length = []
        for n in inputs:
            word_id.append(data_loader.word2id(n[0], self.vocab))
            tag_id.append(data_loader.tag2id(n[1], self.tag_dict))
            length.append(len(n[0]))
            temp = [0] * len(n[0])
            temp[n[0].index(n[2])] = 1
            predicate.append(temp)

        length = np.array(length)
        word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id,
                                                                dtype='int64',
                                                                padding='post')
        tag_id = tf.keras.preprocessing.sequence.pad_sequences(tag_id,
                                                               dtype='int64',
                                                               padding='post')
        predicate = tf.keras.preprocessing.sequence.pad_sequences(
            predicate, dtype='int64', padding='post')

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
            'word_id': word_id,
            'tag_id': tag_id,
            'predicate': predicate,
            'length': length
        },
                                                              batch_size=512,
                                                              num_epochs=1,
                                                              shuffle=False)
        labels = list(self.estimator.predict(input_fn=predict_input_fn))
        return [
            data_loader.id2label(labels[i][:length[i]], self.label_dict)
            for i in range(len(inputs))
        ]
Пример #9
0
    def predict(self, inputs):
        total_sen = []
        total_en1_end = []
        total_en2_end = []
        total_pos_1 = []
        total_pos_2 = []
        for n in inputs:
            sen = n[0]
            en1, en2 = n[1].split()
            en1_start = sen.index(en1)
            en1_end = en1_start + len(en1) - 1
            total_en1_end.append(en1_end)
            en2_start = sen.index(en2)
            en2_end = en2_start + len(en2) - 1
            total_en2_end.append(en2_end)
            word_id = data_loader.word2id(list(sen), self.vocab)
            total_sen.append(word_id)
            pos_1 = []
            pos_2 = []
            for n in range(len(sen)):
                if n < en1_start:
                    pos_1.append(n - en1_start)
                elif en1_start <= n <= en1_end:
                    pos_1.append(0)
                else:
                    pos_1.append(n - en1_end)

                if n < en2_start:
                    pos_2.append(n - en2_start)
                elif en2_start <= n <= en2_end:
                    pos_2.append(0)
                else:
                    pos_2.append(n - en2_end)
            total_pos_1.append(data_loader.pos_encode(pos_1))
            total_pos_2.append(data_loader.pos_encode(pos_2))

        total_sen = tf.keras.preprocessing.sequence.pad_sequences(
            total_sen, dtype='int64', padding='post')
        total_pos_1 = tf.keras.preprocessing.sequence.pad_sequences(
            total_pos_1, dtype='int64', padding='post')
        total_pos_2 = tf.keras.preprocessing.sequence.pad_sequences(
            total_pos_2, dtype='int64', padding='post')
        total_en1_end = np.array(total_en1_end)
        total_en2_end = np.array(total_en2_end)

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={
            'word_id':
            total_sen,
            'pos_1':
            total_pos_1,
            'pos_2':
            total_pos_2,
            'en1_pos':
            total_en1_end,
            'en2_pos':
            total_en2_end
        },
                                                              batch_size=512,
                                                              num_epochs=1,
                                                              shuffle=False)
        results = list(self.estimator.predict(input_fn=predict_input_fn))
        results = data_loader.id2rel(results, self.rel_dict)
        return results