示例#1
0
        def _inner_iter(self, fpath):
            with make_tf_record_iter(fpath) as record_iter:
                for record in record_iter:
                    lite_example_ids = dj_pb.LiteExampleIds()
                    lite_example_ids.ParseFromString(record)
                    tf_example = tf.train.Example(
                        features=lite_example_ids.features)
                    rows = convert_tf_example_to_dict(tf_example)

                    example_id_num = len(rows['example_id'])
                    index = 0
                    while index < example_id_num:
                        row = dict()
                        for fn in SYNC_ALLOWED_OPTIONAL_FIELDS:
                            if fn not in rows:
                                continue
                            value_list = rows[fn]
                            if len(value_list) > 0:
                                row[fn] = value_list[index]
                        example_id_item = ExampleIdVisitor.ExampleIdItem(
                                index + lite_example_ids.begin_index,
                                row
                            )
                        yield example_id_item
                        index += 1
示例#2
0
 def _inner_iter(self, fpath):
     options = tf.io.TFRecordOptions(
             compression_type=self._options.compressed_type
         )
     with common.make_tf_record_iter(fpath, options) as record_iter:
         for record in record_iter:
             yield TfExampleItem(record)
示例#3
0
 def _sync_dumped_data_block_meta(self):
     dumped_data_block_path = {}
     dumped_data_block_meta_path = {}
     dumped_data_block_meta = []
     data_block_dir = self._data_block_dir()
     if not gfile.Exists(data_block_dir):
         gfile.MakeDirs(data_block_dir)
     elif not gfile.IsDirectory(data_block_dir):
         logging.fatal("%s must be the directory of data block for "\
                       "partition %d", data_block_dir, self._partition_id)
         os._exit(-1)  # pylint: disable=protected-access
     for fpath in self._list_data_block_dir():
         fname = ntpath.basename(fpath)
         if fname.endswith(DataBlockSuffix):
             ftag = fname[:-len(DataBlockSuffix)]
             dumped_data_block_path[ftag] = fpath
         elif fname.endswith(DataBlockMetaSuffix):
             ftag = fname[:-len(DataBlockMetaSuffix)]
             dumped_data_block_meta_path[ftag] = fpath
         else:
             gfile.Remove(fpath)
     for (ftag, fpath) in dumped_data_block_meta_path.items():
         if ftag not in dumped_data_block_path:
             gfile.Remove(fpath)
             gfile.Remove(dumped_data_block_path[ftag])
         else:
             with make_tf_record_iter(fpath) as record_iter:
                 dbm = dj_pb.DataBlockMeta()
                 dbm.ParseFromString(next(record_iter))
                 dumped_data_block_meta.append(dbm)
     dumped_data_block_meta = sorted(dumped_data_block_meta,
                                     key=lambda meta: meta.data_block_index)
     for (idx, meta) in enumerate(dumped_data_block_meta):
         if meta.data_block_index != idx:
             logging.fatal("data_block_index is not consecutive")
             os._exit(-1)  # pylint: disable=protected-access
         if idx == 0:
             continue
         prev_meta = dumped_data_block_meta[idx - 1]
         if prev_meta.follower_restart_index > meta.follower_restart_index:
             logging.fatal("follower_restart_index is not Incremental")
             os._exit(-1)  # pylint: disable=protected-access
         if prev_meta.leader_start_index >= meta.leader_start_index:
             logging.fatal("leader_start_index is not Incremental")
             os._exit(-1)  # pylint: disable=protected-access
         if prev_meta.leader_end_index >= meta.leader_end_index:
             logging.fatal("leader_end_index is not Incremental")
             os._exit(-1)  # pylint: disable=protected-access
     with self._lock:
         if len(dumped_data_block_meta) > len(self._dumped_data_block_meta):
             self._dumped_data_block_meta = dumped_data_block_meta
示例#4
0
 def _inner_iter(self, fpath):
     with make_tf_record_iter(fpath) as record_iter:
         for record in record_iter:
             lite_example_ids = dj_pb.LiteExampleIds()
             lite_example_ids.ParseFromString(record)
             example_id_num = len(lite_example_ids.example_id)
             event_time_num = len(lite_example_ids.event_time)
             assert example_id_num == event_time_num, \
                 "the size of example id and event time must the "\
                 "same. {} != {}".format(example_id_num,
                                         event_time_num)
             index = 0
             while index < len(lite_example_ids.example_id):
                 yield ExampleIdVisitor.ExampleIdItem(
                     lite_example_ids.example_id[index],
                     lite_example_ids.event_time[index],
                     index + lite_example_ids.begin_index)
                 index += 1
示例#5
0
 def _inner_iter(self, fpath):
     with make_tf_record_iter(fpath) as record_iter:
         for record in record_iter:
             example_id = dj_pb.SyncedExampleId()
             example_id.ParseFromString(record)
             yield example_id
示例#6
0
 def _inner_iter(self, fpath):
     with common.make_tf_record_iter(fpath) as record_iter:
         for record in record_iter:
             yield TfExampleItem(record)