def test_get_input_fn(self): options_str = r""" vcr_reader { input_pattern: "output/uncased/VCR-RAW/val.record-00000-of-00005" shuffle_buffer_size: 10 interleave_cycle_length: 1 batch_size: 8 prefetch_buffer_size: 8000 desired_size: 600 vocab_file: "data/bert/tf1.x/BERT-Base/vocab.txt" out_of_vocabulary_token_id: 100 } """ options = text_format.Merge(options_str, reader_pb2.Reader()) dataset = reader.get_input_fn(options, is_training=False)() for elem in dataset.take(1): import pdb pdb.set_trace() for key, value in elem.items(): logging.info('%s: %s', key, value.shape) logging.info('Examples:') logging.info('answer_choices: %s', elem['answer_choices'][0]) logging.info('answer_choices_tag: %s', elem['answer_choices_tag'][0]) logging.info('answer_choices_len: %s', elem['answer_choices_len'][0]) logging.info('rationale_choices: %s', elem['rationale_choices'][0]) logging.info('rationale_choices_tag: %s', elem['rationale_choices_tag'][0]) logging.info('rationale_choices_len: %s', elem['rationale_choices_len'][0])
def test_get_input_fn(self): options_str = r""" vcr_reader { input_pattern: "/own_files/yekeren/VCR-2stages-allboxes/val.record-*-of-00005" shuffle_buffer_size: 10 interleave_cycle_length: 1 batch_size: 20 prefetch_buffer_size: 8000 frcnn_feature_dims: 1536 bert_feature_dims: 768 decode_jpeg: false } """ options = text_format.Merge(options_str, reader_pb2.Reader()) dataset = reader.get_input_fn(options, is_training=False)() for elem in dataset.take(1): self.assertNotIn(InputFields.img_data, elem) self.assertNotIn(InputFields.img_width, elem) self.assertNotIn(InputFields.img_height, elem) (num_objects, object_bboxes, object_labels, object_scores, object_features) = (elem[InputFields.num_objects], elem[InputFields.object_bboxes], elem[InputFields.object_labels], elem[InputFields.object_scores], elem[InputFields.object_features]) import pdb pdb.set_trace() for key, value in elem.items(): logging.info('=' * 64) logging.info(key) logging.info(value)
def test_get_input_fn(self): batch_size = 17 # input_pattern: "data-mscoco/tfrecords_v2_prop50_iou/coco_sgs.tfreocrd-00000-of-00020" options_str = r""" caption_graph_reader { input_pattern: "/own_files/yekeren/WSSGG/data-vspnet/tfrecords/caption-graph-hanwang-v1/train.tfrecord*" batch_size: %i shuffle_buffer_size: 500 prefetch_buffer_size: 500 feature_dimensions: 1536 max_n_proposal: 20 } """ % (batch_size) options = text_format.Merge(options_str, reader_pb2.Reader()) dataset = reader.get_input_fn(options, is_training=False)() for elem in dataset.take(1): import pdb pdb.set_trace() self.assertAllEqual(elem['id'].shape, [batch_size]) self.assertAllEqual(elem['image/n_proposal'].shape, [batch_size]) self.assertAllEqual(elem['image/proposal'].shape, [batch_size, 20, 4]) self.assertAllEqual(elem['image/proposal/feature'].shape, [batch_size, 20, 1536]) self.assertDTypeEqual(elem['id'], np.int64) self.assertDTypeEqual(elem['image/n_proposal'], np.int32) self.assertDTypeEqual(elem['image/proposal'], np.float32) self.assertDTypeEqual(elem['image/proposal/feature'], np.float32) # Check graphs. for scope in ['caption_graph']: self.assertDTypeEqual(elem[scope + '/caption'], np.object) max_n_node = elem[scope + '/n_node'].numpy().max() max_n_edge = elem[scope + '/n_edge'].numpy().max() self.assertAllEqual(elem[scope + '/nodes'].shape, [batch_size, max_n_node]) self.assertAllEqual(elem[scope + '/edges'].shape, [batch_size, max_n_edge]) self.assertAllEqual(elem[scope + '/senders'].shape, [batch_size, max_n_edge]) self.assertAllEqual(elem[scope + '/receivers'].shape, [batch_size, max_n_edge]) self.assertDTypeEqual(elem[scope + '/n_node'], np.int32) self.assertDTypeEqual(elem[scope + '/n_edge'], np.int32) self.assertDTypeEqual(elem[scope + '/nodes'], np.object) self.assertDTypeEqual(elem[scope + '/edges'], np.object) self.assertDTypeEqual(elem[scope + '/senders'], np.int32) self.assertDTypeEqual(elem[scope + '/receivers'], np.int32)
def setUp(self): options_str = r""" input_pattern: "output/VOC2007_test_ssbox.record-00001-of-00020" interleave_cycle_length: 2 is_training: true shuffle_buffer_size: 10 batch_size: 7 max_num_proposals: 2000 image_resizer { fixed_shape_resizer { height: 448 width: 448 } } """ self._options = reader_pb2.Reader() text_format.Merge(options_str, self._options)
def test_get_input_fn(self): options_str = r""" vcr_text_frcnn_reader { input_pattern: "output/uncased/VCR-text_and_frcnn/val.record-*-of-00005" shuffle_buffer_size: 10 interleave_cycle_length: 1 batch_size: 60 prefetch_buffer_size: 8000 frcnn_feature_dims: 1536 num_parallel_calls: 10 } """ options = text_format.Merge(options_str, reader_pb2.Reader()) dataset = reader.get_input_fn(options, is_training=False)() for elem in dataset.take(10000): for key, value in elem.items(): import pdb pdb.set_trace() j = 1
def test_get_input_fn(self): options_str = r""" vcr_text_only_reader { input_pattern: "output/uncased/VCR-text_only/val.record-*-of-00005" shuffle_buffer_size: 10 interleave_cycle_length: 1 batch_size: 3 prefetch_buffer_size: 8000 } """ options = text_format.Merge(options_str, reader_pb2.Reader()) dataset = reader.get_input_fn(options, is_training=False)() for elem in dataset.take(1): for key, value in elem.items(): logging.info('=' * 64) logging.info(key) logging.info(value) import pdb pdb.set_trace() j = 1