def predict(self, inputs): total_sen = [] total_indicator = [] length = [] for n in inputs: sen = n[0] en1, en2 = n[1].split() word_id = data_loader.word2id(list(sen), self.vocab) total_sen.append(word_id) en_indicator = [0] * len(sen) en_indicator[sen.index(en1):sen.index(en1) + len(en1)] = [1] * len(en1) en_indicator[sen.index(en2):sen.index(en2) + len(en2)] = [-1] * len(en2) total_indicator.append(en_indicator) length.append(len(sen)) total_sen = tf.keras.preprocessing.sequence.pad_sequences( total_sen, dtype='int64', padding='post') total_indicator = tf.keras.preprocessing.sequence.pad_sequences( total_indicator, dtype='int64', padding='post') length = np.array(length) predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ 'word_id': total_sen, 'en_indicator': total_indicator, 'length': length }, batch_size=512, num_epochs=1, shuffle=False) results = list(self.estimator.predict(input_fn=predict_input_fn)) results = data_loader.id2rel(results, self.rel_dict) return results
def predict(self, text): if not isinstance(text, list): text = [text] length = np.array([len(t) for t in text]) word_id = [data_loader.word2id(list(t), self.vocab) for t in text] char_images = [ data_loader.word2image(list(t), self.char2image) for t in text ] word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, dtype='int64', padding='post') char_images = tf.keras.preprocessing.sequence.pad_sequences( char_images, dtype='float32', padding='post') char_images = (char_images / 255 - 0.5) * 2 predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ "word_id": word_id, 'char_image': char_images, 'length': length }, batch_size=512, num_epochs=1, shuffle=False) labels = list(self.estimator.predict(input_fn=predict_input_fn)) return [ data_loader.id2label(labels[i][:length[i]], self.tag) for i in range(len(text)) ]
def predict(self, sen, pos): assert len(sen) == len(pos) if not isinstance(sen[0], list): sen = [sen] if not isinstance(pos[0], list): pos = [pos] length = np.array([len(s) + 1 for s in sen]) word_id = [[0] + data_loader.word2id(s, self.vocab) for s in sen] pos_id = [[0] + data_loader.pos2id(p, self.pos_dict) for p in pos] word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, dtype='int64', padding='post') pos_id = tf.keras.preprocessing.sequence.pad_sequences(pos_id, dtype='int64', padding='post') predict_input_fn = tf.estimator.inputs.numpy_input_fn( x={"word_id": word_id, 'pos_id': pos_id, 'length': length}, batch_size=128, num_epochs=1, shuffle=False) pred = list(self.estimator.predict(input_fn=predict_input_fn)) results = [] for i in range(len(pred)): result = [] arc = mst(pred[i]['arc_logits'][:length[i], :length[i]])[1:] label = np.argmax(pred[i]['label_logits'][range(1, length[i]), arc, :], -1) label = data_loader.id2dep(label, self.dep_dict) [result.append((w, p, str(a), l)) for w, p, a, l in zip(sen[i], pos[i], arc, label)] results.append(result) return results
def predict(self, inputs): premise_words = [data_loader.pad_to_fixed_len(n[0]) for n in inputs] premise_tags = [data_loader.pad_to_fixed_len(n[1]) for n in inputs] hypothesis_words = [data_loader.pad_to_fixed_len(n[2]) for n in inputs] hypothesis_tags = [data_loader.pad_to_fixed_len(n[3]) for n in inputs] p_word_id = np.array([data_loader.word2id(s, self.vocab) for s in premise_words], np.int64) p_pos_id = np.array([data_loader.pos2id(p, self.pos_dict) for p in premise_tags], np.int64) h_word_id = np.array([data_loader.word2id(s, self.vocab) for s in hypothesis_words], np.int64) h_pos_id = np.array([data_loader.pos2id(p, self.pos_dict) for p in hypothesis_tags], np.int64) p_char_images = np.array([data_loader.word2image(s) for s in premise_words], np.float32) / 127.5 - 1 h_char_images = np.array([data_loader.word2image(s) for s in hypothesis_words], np.float32) / 127.5 - 1 predict_input_fn = tf.estimator.inputs.numpy_input_fn( x={'p_word_id': p_word_id, 'p_pos_id': p_pos_id, 'h_word_id': h_word_id, 'h_pos_id': h_pos_id, 'p_char_images': p_char_images, 'h_char_images': h_char_images}, batch_size=20, num_epochs=1, shuffle=False) results = list(self.estimator.predict(input_fn=predict_input_fn)) return results
def predict(self, text): if not isinstance(text, list): text = [text] length = np.array([len(t) for t in text]) word_id = [data_loader.word2id(list(t), self.vocab) for t in text] word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, padding='post') predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ "word_id": word_id, 'length': length }, batch_size=1024, num_epochs=1, shuffle=False) labels = list(self.estimator.predict(input_fn=predict_input_fn)) return [[id2label[l] for l in labels[i][:length[i]]] for i in range(len(text))]
def predict(self, text): if not isinstance(text, list): text = [text] word_id = [data_loader.word2id(list(t), self.vocab) for t in text] word_id = tf.keras.preprocessing.sequence.pad_sequences( word_id, maxlen=Config.data.max_sequence_length, dtype='int64', padding='post', truncating='post') predict_input_fn = tf.estimator.inputs.numpy_input_fn(x=word_id, batch_size=512, num_epochs=1, shuffle=False) labels = list(self.estimator.predict(input_fn=predict_input_fn)) return [ data_loader.id2label(labels[i], self.tag) for i in range(len(text)) ]
def predict(self, sen, pos): assert len(sen) == len(pos) if not isinstance(sen[0], list): sen = [sen] if not isinstance(pos[0], list): pos = [pos] length = np.array([len(s) for s in sen]) word_id = [data_loader.word2id(s, self.vocab) for s in sen] pos_id = [data_loader.pos2id(p, self.pos_dict) for p in pos] word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, dtype='int64', padding='post') pos_id = tf.keras.preprocessing.sequence.pad_sequences(pos_id, dtype='int64', padding='post') predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ "word_id": word_id, 'pos_id': pos_id, 'length': length }, batch_size=128, num_epochs=1, shuffle=False) pred = list(self.estimator.predict(input_fn=predict_input_fn)) results = [] for i in range(len(pred)): result = [] sample_head = pred[i]['pred_head'][:length[i]] sample_dep = pred[i]['pred_dep'][:length[i]] for n, word_head in enumerate(sample_head): for j, head in enumerate(word_head): if head == -1: continue else: result.append( (sen[i][n], pos[i][n], str(head), data_loader.id2dep(sample_dep[n][j], self.dep_dict))) results.append(result) return results
def predict(self, inputs): word_id = [] tag_id = [] predicate = [] length = [] for n in inputs: word_id.append(data_loader.word2id(n[0], self.vocab)) tag_id.append(data_loader.tag2id(n[1], self.tag_dict)) length.append(len(n[0])) temp = [0] * len(n[0]) temp[n[0].index(n[2])] = 1 predicate.append(temp) length = np.array(length) word_id = tf.keras.preprocessing.sequence.pad_sequences(word_id, dtype='int64', padding='post') tag_id = tf.keras.preprocessing.sequence.pad_sequences(tag_id, dtype='int64', padding='post') predicate = tf.keras.preprocessing.sequence.pad_sequences( predicate, dtype='int64', padding='post') predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ 'word_id': word_id, 'tag_id': tag_id, 'predicate': predicate, 'length': length }, batch_size=512, num_epochs=1, shuffle=False) labels = list(self.estimator.predict(input_fn=predict_input_fn)) return [ data_loader.id2label(labels[i][:length[i]], self.label_dict) for i in range(len(inputs)) ]
def predict(self, inputs): total_sen = [] total_en1_end = [] total_en2_end = [] total_pos_1 = [] total_pos_2 = [] for n in inputs: sen = n[0] en1, en2 = n[1].split() en1_start = sen.index(en1) en1_end = en1_start + len(en1) - 1 total_en1_end.append(en1_end) en2_start = sen.index(en2) en2_end = en2_start + len(en2) - 1 total_en2_end.append(en2_end) word_id = data_loader.word2id(list(sen), self.vocab) total_sen.append(word_id) pos_1 = [] pos_2 = [] for n in range(len(sen)): if n < en1_start: pos_1.append(n - en1_start) elif en1_start <= n <= en1_end: pos_1.append(0) else: pos_1.append(n - en1_end) if n < en2_start: pos_2.append(n - en2_start) elif en2_start <= n <= en2_end: pos_2.append(0) else: pos_2.append(n - en2_end) total_pos_1.append(data_loader.pos_encode(pos_1)) total_pos_2.append(data_loader.pos_encode(pos_2)) total_sen = tf.keras.preprocessing.sequence.pad_sequences( total_sen, dtype='int64', padding='post') total_pos_1 = tf.keras.preprocessing.sequence.pad_sequences( total_pos_1, dtype='int64', padding='post') total_pos_2 = tf.keras.preprocessing.sequence.pad_sequences( total_pos_2, dtype='int64', padding='post') total_en1_end = np.array(total_en1_end) total_en2_end = np.array(total_en2_end) predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={ 'word_id': total_sen, 'pos_1': total_pos_1, 'pos_2': total_pos_2, 'en1_pos': total_en1_end, 'en2_pos': total_en2_end }, batch_size=512, num_epochs=1, shuffle=False) results = list(self.estimator.predict(input_fn=predict_input_fn)) results = data_loader.id2rel(results, self.rel_dict) return results