def to_proto(self) -> proto_lib.SplitInfo: return proto_lib.SplitInfo( name=self.name, shard_lengths=self.shard_lengths, num_bytes=self.num_bytes, statistics=self.statistics if self.statistics.ByteSize() else None, )
def test_from_proto(self): sd = splits.SplitDict.from_proto("ds_name", [ proto.SplitInfo(name="validation", shard_lengths=[5], num_bytes=0) ]) self.assertIn("validation", sd) self.assertNotIn("train", sd) self.assertNotIn("test", sd)
def to_proto(self) -> proto_lib.SplitInfo: if self.filename_template: filepath_template = self.filename_template.template else: filepath_template = naming.DEFAULT_FILENAME_TEMPLATE return proto_lib.SplitInfo( name=self.name, shard_lengths=self.shard_lengths, num_bytes=self.num_bytes, statistics=self.statistics if self.statistics.ByteSize() else None, filepath_template=filepath_template, )
def test_from_proto(self): sd = splits.SplitDict.from_proto( filename_template=naming.ShardedFileTemplate( dataset_name='ds_name', data_dir='/path', filetype_suffix='tfrecord'), repeated_split_infos=[ proto.SplitInfo(name='validation', shard_lengths=[5], num_bytes=0) ]) self.assertIn('validation', sd) self.assertNotIn('train', sd) self.assertNotIn('test', sd)
def test_from_proto(self): sd = splits.SplitDict.from_proto( "ds_name", [proto.SplitInfo(name="validation", num_shards=5)]) self.assertIn("validation", sd) self.assertNotIn("train", sd) self.assertNotIn("test", sd)
def test_from_proto(self): sd = splits.SplitDict.from_proto( [proto.SplitInfo(name="validation", num_shards=5)]) self.assertTrue("validation" in sd) self.assertFalse("train" in sd) self.assertFalse("test" in sd)
def split_for(name: str, shard_lengths: Sequence[int]) -> proto.SplitInfo: return proto.SplitInfo(name=name, shard_lengths=shard_lengths)