def _gen(self): """ A generator supports the iter() protocol (e.g. a generator function), which is used to create a dataset for RecordIO. """ while True: task = self._worker.get_task() if not task.shard_file_name: if task.type == elasticdl_pb2.WAIT: self._pending_dataset = True logger.info( "Finish current dataset, maybe more data later") else: logger.info("No more task, stopping") break with self._lock: if (self._training_with_evaluation and task.type == elasticdl_pb2.EVALUATION): self._pending_eval_tasks.append(task) continue self._record_count += task.end - task.start self._pending_tasks_with_counts.append( (task, self._record_count)) if len(self._pending_tasks_with_counts) == 1: self._current_task = task with closing( recordio.Scanner(task.shard_file_name, task.start, task.end - task.start)) as reader: while True: record = reader.record() if record: yield record else: break
def read_records(self, start, end): target_files = self._get_record_file(start, end) for file_path, start, count in target_files: with closing(recordio.Scanner(file_path, start, count)) as reader: while True: record = reader.record() if record: yield record else: break
def read_records(self, task): with closing( recordio.Scanner(task.shard_name, task.start, task.end - task.start)) as reader: while True: record = reader.record() if record: yield record else: break
def gen(self): for s in self._shards: with closing(recordio.Scanner(s[0], s[1], s[2] - s[1])) as reader: while True: record = reader.record() if record: yield record else: break