def parse_line(tf_line): """Parse line.""" def _do_parse(line): example = json.loads(line) return example["question"], example["answer"] tf_question, tf_answers = tensor_utils.shaped_py_func( func=_do_parse, inputs=[tf_line], types=[tf.string, tf.string], shapes=[[], [None]], stateful=False) return dict(question=tf_question, answers=tf_answers)
def parse_fever_line(tf_line): """Convert FEVER sample from a string.""" def _do_parse(line): example = json.loads(line) return example["claim"], _FEVER_LABEL_DICT[example["label"]] tf_question, tf_answers = tensor_utils.shaped_py_func( func=_do_parse, inputs=[tf_line], types=[tf.string, tf.int64], shapes=[[], []], stateful=False) return dict(question=tf_question, answers=tf_answers)
def test_shaped_py_func(self): def _fn(x, y): return np.array([x + y, x * y]) with tf.Graph().as_default(): z, = tensor_utils.shaped_py_func( func=_fn, inputs=[tf.constant(4), tf.constant(7)], types=[tf.int32], shapes=[2]) self.assertAllEqual(z.get_shape(), [2]) with tf.Session() as sess: tf_z = sess.run(z) self.assertAllEqual(tf_z, [11, 28])
def _nli_line_to_tensors(tf_line, tokenizer): """Map a tensor line from a NLI file to tensor dictionary.""" def _tensorize(line): example = json.loads(line) label = NLI_LABEL_MAP.get(example["gold_label"], -1) return (tokenizer.tokenize(example["sentence1"]), tokenizer.tokenize(example["sentence2"]), example["pairID"], np.int32(label)) tf_sentence1, tf_sentence2, tf_id, tf_label = (tensor_utils.shaped_py_func( func=_tensorize, inputs=[tf_line], types=[tf.string, tf.string, tf.string, tf.int32], shapes=[[None], [None], [], []], stateful=False)) return { "premise": tf_sentence1, "hypothesis": tf_sentence2, "id": tf_id, "label": tf_label }