def test_create_plan_from_proto_correctly(self): from caffe2.python.net_builder import ops with Node('trainer'), Task(name='my_task', num_instances=2) as task: with ops.task_init(): globl = ops.Const(0) with ops.task_instance_init(): local = ops.Const(0) with ops.loop(100): ops.Copy(globl, local) with ops.task_instance_exit(): ops.Add([globl, local], [globl]) with ops.task_exit(): ops.Mul([globl, globl], [globl]) plan = core.Plan(task.get_step()) test_plan = core.Plan.create_from_proto(plan.Proto()) self.assertEqual(len(plan.Steps()), 1) self.assertEqual(len(test_plan.Steps()), 1) self.assertEqual(plan.Steps()[0].Name(), test_plan.Steps()[0].Name()) self.assertEqual(len(plan.Nets()), len(test_plan.Nets())) for idx in range(0, len(plan.Nets())): # When we create Net for test_plan, we will end up with new Net # name with postfix. net_1 = plan.Nets()[idx] net_2 = test_plan.Nets()[idx] trim_size = len(net_1.Name()) self.assertEqual(net_1.Name(), net_2.Name()[:trim_size])
def test_create_plan_from_proto_correctly(self): from caffe2.python.net_builder import ops with Node('trainer'), Task(name='my_task', num_instances=2) as task: with ops.task_init(): globl = ops.Const(0) with ops.task_instance_init(): local = ops.Const(0) with ops.loop(100): ops.Copy(globl, local) with ops.task_instance_exit(): ops.Add([globl, local], [globl]) with ops.task_exit(): ops.Mul([globl, globl], [globl]) plan = core.Plan(task.get_step()) test_plan = core.Plan.create_from_proto(plan.Proto()) self.assertEqual(len(plan.Steps()), 1) self.assertEqual(len(test_plan.Steps()), 1) self.assertEqual(len(plan.Proto().network), 9) self.assertEqual(len(test_plan.Proto().network), 9) self.assertEqual(len(plan.Proto().execution_step), 1) self.assertEqual(len(test_plan.Proto().execution_step), 1) self.assertEqual(plan.Steps()[0].Name(), test_plan.Steps()[0].Name()) self.assertEqual(len(plan.Nets()), len(test_plan.Nets())) for idx in range(0, len(plan.Nets())): # When we create Net for test_plan, we will end up with new Net # name with postfix. net_1 = plan.Nets()[idx] net_2 = test_plan.Nets()[idx] trim_size = len(net_1.Name()) self.assertEqual(net_1.Name(), net_2.Name()[:trim_size])
def example_task(): with Task(): with ops.task_init(): one = ops.Const(1) two = ops.Add([one, one]) with ops.task_init(): three = ops.Const(3) accum = ops.Add([two, three]) # here, accum should be 5 with ops.task_exit(): # here, accum should be 6, since this executes after lines below seven_1 = ops.Add([accum, one]) six = ops.Add([accum, one]) ops.Add([accum, one], [accum]) seven_2 = ops.Add([accum, one]) o6 = final_output(six) o7_1 = final_output(seven_1) o7_2 = final_output(seven_2) with Task(num_instances=2): with ops.task_init(): one = ops.Const(1) with ops.task_instance_init(): local = ops.Const(2) ops.Add([one, local], [one]) ops.LogInfo('ble') return o6, o7_1, o7_2
def _runtime_threads_task(name, group, final_outputs, reader, num_threads, output, capacity): node_name = str(Node.current()) profiler_name = "{0}/{1}/{2}/{3}/{4}".format( node_name, "pipe", name, processor_name(input) if input else "NoInput", processor_name(output) if output else "NoOutput") with Task(name=name, group=group, outputs=final_outputs, num_instances=num_threads) as task: global_exit_net = core.Net('pipe:exit') global_init_net = core.Net('pipe:init') reader.setup_ex(global_init_net, global_exit_net) init_net = core.Net('pipe:instance:init') exit_net = core.Net('pipe:instance:exit') read_nets, status, rec = reader.read_record_ex(init_net, exit_net) init_net.ConstantFill( [], [status], shape=[], value=False, dtype=core.DataType.BOOL ) if rec is not None: out_queue, writer = _init_output( output, capacity, global_init_net, global_exit_net) write_nets, _ = writer.write_record_ex( rec, init_net, exit_net, status) else: out_queue = None write_nets = [] with ops.task_init(): ops.net(global_init_net) with ops.task_instance_init(): ops.net(init_net) timer_start_net = core.Net('timer_start') timer = timer_start_net.TimerBegin([], counter_name=profiler_name) timer_end_net = core.Net('timer_end') timer_end_net.TimerEnd(timer, []) ops.net(core.execution_step( 'body', [timer_start_net] + list(read_nets) + list(write_nets) + [timer_end_net], should_stop_blob=status)) ops.net(timer_end_net) with ops.task_instance_exit(): ops.net(exit_net) with ops.task_exit(): ops.net(global_exit_net) return out_queue, task
def _timed_task(self, cp_op_name, add_op): """ Build a Task that will measure the time span of checkpoint operations, once operation is done, time can be read from _current_checkpoint_duration. Args: cp_op_name: A string name of the checkpoint operation. add_op: A functor to add the checkpoint operation. Returns: A task with timer. """ with Task(name=cp_op_name) as task: with ops.task_init(): timer = ops.TimerBegin([], counter_name=self._node_name) add_op() with ops.task_exit(): time_span_blob = ops.TimerGetAndEnd(timer) self._current_checkpoint_duration = final_output(time_span_blob) return task
def test_multi_instance(self): NUM_INSTANCES = 10 NUM_ITERS = 15 with TaskGroup() as tg: with Task(num_instances=NUM_INSTANCES): with ops.task_init(): counter1 = ops.CreateCounter([], ['global_counter']) counter2 = ops.CreateCounter([], ['global_counter2']) counter3 = ops.CreateCounter([], ['global_counter3']) # both task_counter and local_counter should be thread local with ops.task_instance_init(): task_counter = ops.CreateCounter([], ['task_counter']) local_counter = ops.CreateCounter([], ['local_counter']) with ops.loop(NUM_ITERS): ops.CountUp(counter1) ops.CountUp(task_counter) ops.CountUp(local_counter) # gather sum of squares of local counters to make sure that # each local counter counted exactly up to NUM_ITERS, and # that there was no false sharing of counter instances. with ops.task_instance_exit(): count2 = ops.RetrieveCount(task_counter) with ops.loop(ops.Mul([count2, count2])): ops.CountUp(counter2) # This should have the same effect as the above count3 = ops.RetrieveCount(local_counter) with ops.loop(ops.Mul([count3, count3])): ops.CountUp(counter3) # The code below will only run once with ops.task_exit(): total1 = final_output(ops.RetrieveCount(counter1)) total2 = final_output(ops.RetrieveCount(counter2)) total3 = final_output(ops.RetrieveCount(counter3)) with LocalSession() as session: session.run(tg) self.assertEquals(total1.fetch(), NUM_INSTANCES * NUM_ITERS) self.assertEquals(total2.fetch(), NUM_INSTANCES * (NUM_ITERS**2)) self.assertEquals(total3.fetch(), NUM_INSTANCES * (NUM_ITERS**2))
def test_multi_instance(self): NUM_INSTANCES = 10 NUM_ITERS = 15 with TaskGroup() as tg: with Task(num_instances=NUM_INSTANCES): with ops.task_init(): counter1 = ops.CreateCounter([], ['global_counter']) counter2 = ops.CreateCounter([], ['global_counter2']) counter3 = ops.CreateCounter([], ['global_counter3']) # both task_counter and local_counter should be thread local with ops.task_instance_init(): task_counter = ops.CreateCounter([], ['task_counter']) local_counter = ops.CreateCounter([], ['local_counter']) with ops.loop(NUM_ITERS): ops.CountUp(counter1) ops.CountUp(task_counter) ops.CountUp(local_counter) # gather sum of squares of local counters to make sure that # each local counter counted exactly up to NUM_ITERS, and # that there was no false sharing of counter instances. with ops.task_instance_exit(): count2 = ops.RetrieveCount(task_counter) with ops.loop(ops.Mul([count2, count2])): ops.CountUp(counter2) # This should have the same effect as the above count3 = ops.RetrieveCount(local_counter) with ops.loop(ops.Mul([count3, count3])): ops.CountUp(counter3) # The code below will only run once with ops.task_exit(): total1 = final_output(ops.RetrieveCount(counter1)) total2 = final_output(ops.RetrieveCount(counter2)) total3 = final_output(ops.RetrieveCount(counter3)) with LocalSession() as session: session.run(tg) self.assertEquals(total1.fetch(), NUM_INSTANCES * NUM_ITERS) self.assertEquals(total2.fetch(), NUM_INSTANCES * (NUM_ITERS ** 2)) self.assertEquals(total3.fetch(), NUM_INSTANCES * (NUM_ITERS ** 2))
def test_setup(self): with Task() as task: with ops.task_init(): one = ops.Const(1) two = ops.Add([one, one]) with ops.task_init(): three = ops.Const(3) accum = ops.Add([two, three]) # here, accum should be 5 with ops.task_exit(): # here, accum should be 6, since this executes after lines below seven_1 = ops.Add([accum, one]) six = ops.Add([accum, one]) ops.Add([accum, one], [accum]) seven_2 = ops.Add([accum, one]) o6 = final_output(six) o7_1 = final_output(seven_1) o7_2 = final_output(seven_2) with LocalSession() as session: session.run(task) self.assertEquals(o6.fetch(), 6) self.assertEquals(o7_1.fetch(), 7) self.assertEquals(o7_2.fetch(), 7)
def proc(rec): # executed once with ops.task_init(): counter1 = ops.CreateCounter([], ['global_counter']) counter2 = ops.CreateCounter([], ['global_counter2']) counter3 = ops.CreateCounter([], ['global_counter3']) # executed once per thread with ops.task_instance_init(): task_counter = ops.CreateCounter([], ['task_counter']) # executed on each iteration ops.CountUp(counter1) ops.CountUp(task_counter) # executed once per thread with ops.task_instance_exit(): with ops.loop(ops.RetrieveCount(task_counter)): ops.CountUp(counter2) ops.CountUp(counter3) # executed once with ops.task_exit(): totals[0] = final_output(ops.RetrieveCount(counter1)) totals[1] = final_output(ops.RetrieveCount(counter2)) totals[2] = final_output(ops.RetrieveCount(counter3)) return rec