def test_composite_reader(self): ws = workspace.C.Workspace() session = LocalSession(ws) num_srcs = 3 names = ["src_{}".format(i) for i in range(num_srcs)] size = 100 offsets = [i * size for i in range(num_srcs)] src_dses = [make_source_dataset(ws, offset=offset, size=size, name=name) for (name, offset) in zip(names, offsets)] data = [ws.fetch_blob(str(src.field_blobs[0])) for src in src_dses] # Sanity check we didn't overwrite anything for d, offset in zip(data, offsets): npt.assert_array_equal(d, range(offset, offset + size)) # Make an identically-sized empty destnation dataset dst_ds_schema = schema.Struct( *[ (name, src_ds.content().clone_schema()) for name, src_ds in zip(names, src_dses) ] ) dst_ds = make_destination_dataset(ws, dst_ds_schema) with TaskGroup() as tg: reader = CompositeReader(names, [src_ds.reader() for src_ds in src_dses]) pipe(reader, dst_ds.writer(), num_runtime_threads=3) session.run(tg) for i in range(num_srcs): written_data = sorted( ws.fetch_blob(str(dst_ds.content()[names[i]].label()))) npt.assert_array_equal(data[i], written_data, "i: {}".format(i))
def build_cache_step(self, overwrite=False): """Build a step for generating cache DB file. If self.db_path exists and not overwritting, build an empty step. Overwise, build a step as follows. Pipe original reader to the _DatasetWriter, so that dataset field blobs are populated. Then save these blobs into a file. Args: overwrite: bool. If true, ignore the existing file and build a new one overwritting the existing one anyway. Returns: build_cache_step: ExcutionStep. The step to be run for building a cache DB file. """ if os.path.exists(self.db_path) and not overwrite: # cache already exists, no need to rebuild it return core.execution_step('build_step', []) init_net = core.Net('init') self._init_field_blobs_as_empty(init_net) with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg: pipe(self.original_reader, self.ds.writer(), num_threads=16) copy_step = copy_tg.to_task().get_step() save_net = core.Net('save') self._save_field_blobs_to_db_file(save_net) return core.execution_step('build_cache', [init_net, copy_step, save_net])
def test_composite_reader_builder(self): ws = workspace.C.Workspace() session = LocalSession(ws) num_srcs = 3 names = ["src_{}".format(i) for i in range(num_srcs)] size = 100 offsets = [i * size for i in range(num_srcs)] src_ds_builders = [ TestReaderBuilder(offset=offset, size=size, name=name) for (name, offset) in zip(names, offsets) ] # Make an identically-sized empty destnation dataset dst_ds_schema = schema.Struct( *[ (name, src_ds_builder.schema()) for name, src_ds_builder in zip(names, src_ds_builders) ] ) dst_ds = make_destination_dataset(ws, dst_ds_schema) with TaskGroup() as tg: reader_builder = CompositeReaderBuilder( names, src_ds_builders) reader_builder.setup(ws=ws) pipe(reader_builder.new_reader(), dst_ds.writer(), num_runtime_threads=3) session.run(tg) for name, offset in zip(names, offsets): written_data = sorted( ws.fetch_blob(str(dst_ds.content()[name].label()))) npt.assert_array_equal(range(offset, offset + size), written_data, "name: {}".format(name))
def test_composite_reader_builder(self): ws = workspace.C.Workspace() session = LocalSession(ws) num_srcs = 3 names = ["src_{}".format(i) for i in range(num_srcs)] size = 100 offsets = [i * size for i in range(num_srcs)] src_ds_builders = [ TestReaderBuilder(offset=offset, size=size, name=name) for (name, offset) in zip(names, offsets) ] # Make an identically-sized empty destnation dataset dst_ds_schema = schema.Struct( *[(name, src_ds_builder.schema()) for name, src_ds_builder in zip(names, src_ds_builders)]) dst_ds = make_destination_dataset(ws, dst_ds_schema) with TaskGroup() as tg: reader_builder = CompositeReaderBuilder(names, src_ds_builders) reader_builder.setup(ws=ws) pipe(reader_builder.new_reader(), dst_ds.writer(), num_runtime_threads=3) session.run(tg) for name, offset in zip(names, offsets): written_data = sorted( ws.fetch_blob(str(dst_ds.content()[name].label()))) npt.assert_array_equal(range(offset, offset + size), written_data, "name: {}".format(name))
def build_cache_step(self, overwrite=False): """Build a step for generating cache DB file. If self.db_path exists and not overwritting, build an empty step. Overwise, build a step as follows. Pipe original reader to the _DatasetWriter, so that dataset field blobs are populated. Then save these blobs into a file. Args: overwrite: bool. If true, ignore the existing file and build a new one overwritting the existing one anyway. Returns: build_cache_step: ExecutionStep. The step to be run for building a cache DB file. """ if os.path.exists(self.db_path) and not overwrite: # cache already exists, no need to rebuild it return core.execution_step('build_step', []) init_net = core.Net('init') self._init_field_blobs_as_empty(init_net) with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg: pipe(self.original_reader, self.ds.writer(), num_threads=16) copy_step = copy_tg.to_task().get_step() save_net = core.Net('save') self._save_field_blobs_to_db_file(save_net) return core.execution_step('build_cache', [init_net, copy_step, save_net])
def build_job(node_id): all_outputs = [] with Job() as job: with Node('reader' + str(node_id)): with job.init_group: init_net = core.Net('init_net' + str(node_id)) data_arr = Struct(('val', np.array(range(10)))) data = ConstRecord(init_net, data_arr) ds = Dataset(data, name='dataset' + str(node_id)) full_reader = ds.reader(init_net) total = init_net.Const([100]) Task(step=init_net) def inc_total(rec): net = core.Net('inc_total' + str(node_id)) net.Add([total, rec.val()], [total]) return [net] epoch_reader = ReaderWithLimit(full_reader, num_iter=3) pipe(epoch_reader, processor=inc_total) job.add_stop_signal(epoch_reader.data_finished()) all_outputs.append(total) total_fetcher = Task(step=core.Net('empty'), outputs=all_outputs) return job, total_fetcher
def test_composite_reader(self): ws = workspace.C.Workspace() session = LocalSession(ws) num_srcs = 3 names = ["src_{}".format(i) for i in range(num_srcs)] size = 100 offsets = [i * size for i in range(num_srcs)] src_dses = [ make_source_dataset(ws, offset=offset, size=size, name=name) for (name, offset) in zip(names, offsets) ] data = [ws.fetch_blob(str(src.field_blobs[0])) for src in src_dses] # Sanity check we didn't overwrite anything for d, offset in zip(data, offsets): npt.assert_array_equal(d, range(offset, offset + size)) # Make an identically-sized empty destnation dataset dst_ds_schema = schema.Struct( *[(name, src_ds.content().clone_schema()) for name, src_ds in zip(names, src_dses)]) dst_ds = make_destination_dataset(ws, dst_ds_schema) with TaskGroup() as tg: reader = CompositeReader(names, [src_ds.reader() for src_ds in src_dses]) pipe(reader, dst_ds.writer(), num_runtime_threads=3) session.run(tg) for i in range(num_srcs): written_data = sorted( ws.fetch_blob(str(dst_ds.content()[names[i]].label()))) npt.assert_array_equal(data[i], written_data, "i: {}".format(i))
def test_reader_with_limit(self): ws = workspace.C.Workspace() session = LocalSession(ws) """ 1. feed full dataset """ src_init = core.Net('src_init') src_values = Struct(('label', np.array(range(100)))) src_blobs = NewRecord(src_init, src_values) src_ds = Dataset(src_blobs) FeedRecord(src_blobs, src_values, ws) ws.run(src_init) """ 2. Read with limit smaller than size of dataset """ dst_init = core.Net('dst_init') dst_ds = Dataset(src_values.clone_schema()) dst_ds.init_empty(dst_init) ws.run(dst_init) with TaskGroup() as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=10) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertFalse(ws.blobs[str(reader.data_finished())].fetch()) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(10)) """ 3. Read with limit larger than size of dataset """ ws.run(dst_init) with TaskGroup() as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=110) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(100)) self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())
def _test_limit_reader_shared(self, reader_class, size, expected_read_len, expected_finish, num_threads, read_delay, **limiter_args): ws, session, src_ds, dst_init, dst_ds = \ self._test_limit_reader_init_shared(size) # Read without limiter # WorkspaceType.GLOBAL is required because we are fetching # reader.data_finished() after the TaskGroup finishes. with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: if read_delay > 0: reader = reader_class(ReaderWithDelay(src_ds.reader(), read_delay), **limiter_args) else: reader = reader_class(src_ds.reader(), **limiter_args) pipe(reader, dst_ds.writer(), num_runtime_threads=num_threads) session.run(tg) read_len = len(sorted(ws.blobs[str(dst_ds.content().label())].fetch())) self.assertEqual(read_len, expected_read_len) self.assertEqual( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), list(range(expected_read_len)) ) self.assertEqual(ws.blobs[str(reader.data_finished())].fetch(), expected_finish)
def test_reader_with_limit(self): ws = workspace.C.Workspace() session = LocalSession(ws) """ 1. feed full dataset """ src_init = core.Net('src_init') src_values = Struct(('label', np.array(range(100)))) src_blobs = NewRecord(src_init, src_values) src_ds = Dataset(src_blobs) FeedRecord(src_blobs, src_values, ws) ws.run(src_init) """ 2. Read with limit smaller than size of dataset """ dst_init = core.Net('dst_init') dst_ds = Dataset(src_values.clone_schema()) dst_ds.init_empty(dst_init) ws.run(dst_init) with TaskGroup() as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=10) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertFalse(ws.blobs[str(reader.data_finished())].fetch()) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(10)) """ 3. Read with limit larger than size of dataset """ ws.run(dst_init) with TaskGroup() as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=110) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(100)) self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())
def _test_limit_reader_shared(self, reader_class, size, expected_read_len, expected_finish, num_threads, read_delay, **limiter_args): ws, session, src_ds, dst_ds = \ self._test_limit_reader_init_shared(size) # Read without limiter # WorkspaceType.GLOBAL is required because we are fetching # reader.data_finished() after the TaskGroup finishes. with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: if read_delay > 0: reader = reader_class( ReaderWithDelay(src_ds.reader(), read_delay), **limiter_args) else: reader = reader_class(src_ds.reader(), **limiter_args) pipe(reader, dst_ds.writer(), num_runtime_threads=num_threads) session.run(tg) read_len = len(sorted(ws.blobs[str(dst_ds.content().label())].fetch())) self.assertEqual(read_len, expected_read_len) self.assertEqual( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), list(range(expected_read_len))) self.assertEqual(ws.blobs[str(reader.data_finished())].fetch(), expected_finish)
def _read_all_data(ws, reader, session): dst_ds = make_destination_dataset(ws, reader.schema().clone_schema()) with TaskGroup() as tg: pipe(reader, dst_ds.writer(), num_runtime_threads=8) session.run(tg) return ws.blobs[str(dst_ds.content().label())].fetch()
def _read_all_data(ws, reader, session): dst_ds = make_destination_dataset(ws, reader.schema().clone_schema()) with TaskGroup() as tg: pipe(reader, dst_ds.writer(), num_runtime_threads=8) session.run(tg) return ws.blobs[str(dst_ds.content().label())].fetch()
def test_dequeue_many(self): init_net = core.Net('init') N = 17 NUM_DEQUEUE_RECORDS = 3 src_values = Struct( ('uid', np.array(range(N))), ('value', 0.1 * np.array(range(N)))) expected_dst = Struct( ('uid', 2 * np.array(range(N))), ('value', np.array(N * [0.0]))) with core.NameScope('init'): src_blobs = NewRecord(init_net, src_values) dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema()) counter = init_net.Const(0) ONE = init_net.Const(1) def proc1(rec): with core.NameScope('proc1'): out = NewRecord(ops, rec) ops.Add([rec.uid(), rec.uid()], [out.uid()]) out.value.set(blob=rec.value(), unsafe=True) return out def proc2(rec): with core.NameScope('proc2'): out = NewRecord(ops, rec) out.uid.set(blob=rec.uid(), unsafe=True) ops.Sub([rec.value(), rec.value()], [out.value()]) ops.Add([counter, ONE], [counter]) return out src_ds = Dataset(src_blobs) dst_ds = Dataset(dst_blobs) with TaskGroup() as tg: out1 = pipe( src_ds.reader(), output=Queue( capacity=11, num_dequeue_records=NUM_DEQUEUE_RECORDS), processor=proc1) out2 = pipe(out1, processor=proc2) pipe(out2, dst_ds.writer()) ws = workspace.C.Workspace() FeedRecord(src_blobs, src_values, ws) session = LocalSession(ws) session.run(init_net) session.run(tg) output = FetchRecord(dst_blobs, ws=ws) num_dequeues = ws.blobs[str(counter)].fetch() self.assertEquals( num_dequeues, int(math.ceil(float(N) / NUM_DEQUEUE_RECORDS))) for a, b in zip(output.field_blobs(), expected_dst.field_blobs()): np.testing.assert_array_equal(a, b)
def test_dequeue_many(self): init_net = core.Net('init') N = 17 NUM_DEQUEUE_RECORDS = 3 src_values = Struct( ('uid', np.array(range(N))), ('value', 0.1 * np.array(range(N)))) expected_dst = Struct( ('uid', 2 * np.array(range(N))), ('value', np.array(N * [0.0]))) with core.NameScope('init'): src_blobs = NewRecord(init_net, src_values) dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema()) counter = init_net.Const(0) ONE = init_net.Const(1) def proc1(rec): with core.NameScope('proc1'): out = NewRecord(ops, rec) ops.Add([rec.uid(), rec.uid()], [out.uid()]) out.value.set(blob=rec.value(), unsafe=True) return out def proc2(rec): with core.NameScope('proc2'): out = NewRecord(ops, rec) out.uid.set(blob=rec.uid(), unsafe=True) ops.Sub([rec.value(), rec.value()], [out.value()]) ops.Add([counter, ONE], [counter]) return out src_ds = Dataset(src_blobs) dst_ds = Dataset(dst_blobs) with TaskGroup() as tg: out1 = pipe( src_ds.reader(), output=Queue( capacity=11, num_dequeue_records=NUM_DEQUEUE_RECORDS), processor=proc1) out2 = pipe(out1, processor=proc2) pipe(out2, dst_ds.writer()) ws = workspace.C.Workspace() FeedRecord(src_blobs, src_values, ws) session = LocalSession(ws) session.run(init_net) session.run(tg) output = FetchRecord(dst_blobs, ws=ws) num_dequeues = ws.blobs[str(counter)].fetch() self.assertEquals( num_dequeues, int(math.ceil(float(N) / NUM_DEQUEUE_RECORDS))) for a, b in zip(output.field_blobs(), expected_dst.field_blobs()): np.testing.assert_array_equal(a, b)
def read_all_data(ws, reader, session): dst_init = core.Net('dst_init') with core.NameScope('dst'): dst_ds = Dataset(reader.schema().clone_schema()) dst_ds.init_empty(dst_init) session.run(dst_init) with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: pipe(reader, dst_ds.writer(), num_runtime_threads=8) session.run(tg) return ws.blobs[str(dst_ds.content().label())].fetch()
def read_all_data(ws, reader, session): dst_init = core.Net('dst_init') with core.NameScope('dst'): dst_ds = Dataset(reader.schema().clone_schema()) dst_ds.init_empty(dst_init) session.run(dst_init) with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: pipe(reader, dst_ds.writer(), num_runtime_threads=8) session.run(tg) return ws.blobs[str(dst_ds.content().label())].fetch()
def build_pipeline(node_id): with Node('trainer_%d' % node_id): with Job.current().init_group, Task(): data_arr = Struct(('val', np.array(list(range(10))))) data = ConstRecord(ops, data_arr) ds = Dataset(data, name='dataset:%d' % node_id) full_reader = ds.reader(ops) total = ops.Const([100]) def inc_total(rec): ops.Add([total, rec.val()], [total]) epoch_reader = ReaderWithLimit(full_reader, num_iter=3) pipe(epoch_reader, processor=inc_total) Job.current().add_stop_signal(epoch_reader.data_finished()) return [total]
def build_pipeline(node_id): with Node('trainer_%d' % node_id): with Job.current().init_group, Task(): data_arr = Struct(('val', np.array(list(range(10))))) data = ConstRecord(ops, data_arr) ds = Dataset(data, name='dataset:%d' % node_id) full_reader = ds.reader(ops) total = ops.Const([100]) def inc_total(rec): ops.Add([total, rec.val()], [total]) epoch_reader = ReaderWithLimit(full_reader, num_iter=3) pipe(epoch_reader, processor=inc_total) Job.current().add_stop_signal(epoch_reader.data_finished()) return [total]
def build_cache(self, cache_path, overwrite=False): if not self.has_cache() or overwrite: self.cache_path = cache_path if self.has_cache() and not overwrite: # cache already exists, no need to rebuild it return core.execution_step('build_step', []) init_net = core.Net('init') self._init_dataset(init_net) with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg: pipe(self.original_reader, self.ds.writer(), num_threads=16) copy_step = copy_tg.to_task().get_step() save_net = core.Net('save') self._save_to_file(save_net) return core.execution_step('build_cache', [init_net, copy_step, save_net])
def test_local_session(self): init_net = core.Net('init') src_values = Struct( ('uid', np.array([1, 2, 6])), ('value', np.array([1.4, 1.6, 1.7]))) expected_dst = Struct( ('uid', np.array([2, 4, 12])), ('value', np.array([0.0, 0.0, 0.0]))) with core.NameScope('init'): src_blobs = NewRecord(init_net, src_values) dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema()) def proc1(rec): net = core.Net('proc1') with core.NameScope('proc1'): out = NewRecord(net, rec) net.Add([rec.uid(), rec.uid()], [out.uid()]) out.value.set(blob=rec.value(), unsafe=True) return [net], out def proc2(rec): net = core.Net('proc2') with core.NameScope('proc2'): out = NewRecord(net, rec) out.uid.set(blob=rec.uid(), unsafe=True) net.Sub([rec.value(), rec.value()], [out.value()]) return [net], out src_ds = Dataset(src_blobs) dst_ds = Dataset(dst_blobs) with TaskGroup() as tg: out1 = pipe(src_ds.reader(), processor=proc1) out2 = pipe(out1, processor=proc2) pipe(out2, dst_ds.writer()) ws = workspace.C.Workspace() FeedRecord(src_blobs, src_values, ws) session = LocalSession(ws) session.run(init_net) session.run(tg) output = FetchRecord(dst_blobs, ws=ws) for a, b in zip(output.field_blobs(), expected_dst.field_blobs()): np.testing.assert_array_equal(a, b)
def test_local_session(self): init_net = core.Net('init') src_values = Struct( ('uid', np.array([1, 2, 6])), ('value', np.array([1.4, 1.6, 1.7]))) expected_dst = Struct( ('uid', np.array([2, 4, 12])), ('value', np.array([0.0, 0.0, 0.0]))) with core.NameScope('init'): src_blobs = NewRecord(init_net, src_values) dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema()) def proc1(rec): net = core.Net('proc1') with core.NameScope('proc1'): out = NewRecord(net, rec) net.Add([rec.uid(), rec.uid()], [out.uid()]) out.value.set(blob=rec.value(), unsafe=True) return [net], out def proc2(rec): net = core.Net('proc2') with core.NameScope('proc2'): out = NewRecord(net, rec) out.uid.set(blob=rec.uid(), unsafe=True) net.Sub([rec.value(), rec.value()], [out.value()]) return [net], out src_ds = Dataset(src_blobs) dst_ds = Dataset(dst_blobs) with TaskGroup() as tg: out1 = pipe(src_ds.reader(), processor=proc1) out2 = pipe(out1, processor=proc2) pipe(out2, dst_ds.writer()) ws = workspace.C.Workspace() FeedRecord(src_blobs, src_values, ws) session = LocalSession(ws) session.run(init_net) session.run(tg) output = FetchRecord(dst_blobs, ws=ws) for a, b in zip(output.field_blobs(), expected_dst.field_blobs()): np.testing.assert_array_equal(a, b)
def build_job(): with Node('reader'): with Job() as job: with job.init_group: init_net = core.Net('init_net') data_arr = Struct(('val', np.array(range(10)))) data = ConstRecord(init_net, data_arr) ds = Dataset(data) full_reader = ds.reader(init_net) total = init_net.Const([100]) Task(step=init_net) def inc_total(rec): net = core.Net('inc_total') net.Add([total, rec.val()], [total]) return [net] epoch_reader = ReaderWithLimit(full_reader, num_iter=3) pipe(epoch_reader, processor=inc_total) job.add_stop_signal(epoch_reader.data_finished()) total_fetcher = Task(step=core.Net('empty'), outputs=[total]) return job, total_fetcher
def test_reader_with_limit(self): ws = workspace.C.Workspace() session = LocalSession(ws) """ 1. feed full dataset """ src_ds = init_dataset(ws) """ 2. Read with limit smaller than size of dataset """ dst_init = core.Net('dst_init') with core.NameScope('dst'): dst_ds = Dataset(src_ds.content().clone_schema()) dst_ds.init_empty(dst_init) ws.run(dst_init) # WorkspaceType.GLOBAL is required because we are fetching # reader.data_finished() after the TaskGroup finishes. with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=10) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertFalse(ws.blobs[str(reader.data_finished())].fetch()) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), list(range(10)) ) """ 3. Read with limit larger than size of dataset """ ws.run(dst_init) with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=110) pipe(reader, dst_ds.writer(), num_runtime_threads=8) session.run(tg) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), list(range(100)) ) self.assertTrue(ws.blobs[str(reader.data_finished())].fetch()) """ 4. Read without counter """ ws.run(dst_init) with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg: reader = ReaderWithLimit(src_ds.reader(), num_iter=None) pipe(reader, dst_ds.writer(), num_threads=8) session.run(tg) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), list(range(100)) ) self.assertTrue(ws.blobs[str(reader.data_finished())].fetch()) """ 5. Read using the same reader without resetting workspace """ session.run(tg) self.assertEquals( sorted(ws.blobs[str(dst_ds.content().label())].fetch()), sorted(list(range(100)) * 2) )
def test_runtime_threads(self): ws = workspace.C.Workspace() session = LocalSession(ws) src_ds = init_dataset(ws) totals = [None] * 3 def proc(rec): # executed once with ops.task_init(): counter1 = ops.CreateCounter([], ['global_counter']) counter2 = ops.CreateCounter([], ['global_counter2']) counter3 = ops.CreateCounter([], ['global_counter3']) # executed once per thread with ops.task_instance_init(): task_counter = ops.CreateCounter([], ['task_counter']) # executed on each iteration ops.CountUp(counter1) ops.CountUp(task_counter) # executed once per thread with ops.task_instance_exit(): with ops.loop(ops.RetrieveCount(task_counter)): ops.CountUp(counter2) ops.CountUp(counter3) # executed once with ops.task_exit(): totals[0] = final_output(ops.RetrieveCount(counter1)) totals[1] = final_output(ops.RetrieveCount(counter2)) totals[2] = final_output(ops.RetrieveCount(counter3)) return rec # Read full data set from original reader with TaskGroup() as tg: pipe(src_ds.reader(), num_runtime_threads=8, processor=proc) session.run(tg) self.assertEqual(totals[0].fetch(), 100) self.assertEqual(totals[1].fetch(), 100) self.assertEqual(totals[2].fetch(), 8) # Read with a count-limited reader with TaskGroup() as tg: q1 = pipe(src_ds.reader(), num_runtime_threads=2) q2 = pipe( ReaderWithLimit(q1.reader(), num_iter=25), num_runtime_threads=3) pipe(q2, processor=proc, num_runtime_threads=6) session.run(tg) self.assertEqual(totals[0].fetch(), 25) self.assertEqual(totals[1].fetch(), 25) self.assertEqual(totals[2].fetch(), 6)
def test_runtime_threads(self): ws = workspace.C.Workspace() session = LocalSession(ws) src_ds = init_dataset(ws) totals = [None] * 3 def proc(rec): # executed once with ops.task_init(): counter1 = ops.CreateCounter([], ['global_counter']) counter2 = ops.CreateCounter([], ['global_counter2']) counter3 = ops.CreateCounter([], ['global_counter3']) # executed once per thread with ops.task_instance_init(): task_counter = ops.CreateCounter([], ['task_counter']) # executed on each iteration ops.CountUp(counter1) ops.CountUp(task_counter) # executed once per thread with ops.task_instance_exit(): with ops.loop(ops.RetrieveCount(task_counter)): ops.CountUp(counter2) ops.CountUp(counter3) # executed once with ops.task_exit(): totals[0] = final_output(ops.RetrieveCount(counter1)) totals[1] = final_output(ops.RetrieveCount(counter2)) totals[2] = final_output(ops.RetrieveCount(counter3)) return rec """ 1. Feed full dataset """ with TaskGroup() as tg: pipe(src_ds.reader(), num_runtime_threads=8, processor=proc) session.run(tg) self.assertEquals(totals[0].fetch(), 100) self.assertEquals(totals[1].fetch(), 100) self.assertEquals(totals[2].fetch(), 8) """ 2. Add a few steps in between """ with TaskGroup() as tg: q1 = pipe(src_ds.reader(), num_runtime_threads=2) q2 = pipe( ReaderWithLimit(q1.reader(), num_iter=25), num_runtime_threads=3) pipe(q2, processor=proc, num_runtime_threads=6) session.run(tg) self.assertEquals(totals[0].fetch(), 25) self.assertEquals(totals[1].fetch(), 25) self.assertEquals(totals[2].fetch(), 6)