def benchmark(mod, dry_run=10, iterations=10): if len(mod._context) == 1: ctx = mod._context[0] else: ctx = mx.cpu() data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=ctx) for _, shape in mod.data_shapes] label = [mx.nd.array(np.random.randint(1, 100, size=shape), ctx=ctx) for _, shape in mod.label_shapes] batch = mx.io.DataBatch(data, label) # dry run for i in range(dry_run): mod.forward(batch, is_train=True) mod.backward() for output in mod.get_outputs(merge_multi_context=False)[0]: output.wait_to_read() mod.update() t0 = time.clock() profiler.profiler_set_state('run') # real run for i in range(iterations): mod.forward(batch, is_train=True) mod.backward() mod.update() for output in mod.get_outputs(merge_multi_context=False)[0]: output.wait_to_read() profiler.profiler_set_state('stop') t1 = time.clock() return (t1 - t0)*1000.0 / iterations
def test_profiler(): profile_filename = "test_profile.json" iter_num = 100 begin_profiling_iter = 50 end_profiling_iter = 50 profiler.profiler_set_config(mode='symbolic', filename=profile_filename) print('profile file save to {0}'.format(profile_filename)) A = mx.sym.Variable('A') B = mx.sym.Variable('B') C = mx.symbol.dot(A, B) executor = C.simple_bind(mx.cpu(1), 'write', A=(4096, 4096), B=(4096, 4096)) a = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096)) b = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096)) a.copyto(executor.arg_dict['A']) b.copyto(executor.arg_dict['B']) flag = False print("execution begin") for i in range(iter_num): if i == begin_profiling_iter: t0 = time.clock() profiler.profiler_set_state('run') if i == end_profiling_iter: t1 = time.clock() profiler.profiler_set_state('stop') executor.forward() c = executor.outputs[0] c.wait_to_read() print("execution end") duration = t1 - t0 print('duration: {0}s'.format(duration)) print(' {0}ms/operator'.format(duration * 1000 / iter_num))
def test_profiler(): profile_filename = "test_profile.json" iter_num = 100 begin_profiling_iter = 50 end_profiling_iter = 50 profiler.profiler_set_config(mode='symbolic', filename=profile_filename) print('profile file save to {0}'.format(profile_filename)) A = mx.sym.Variable('A') B = mx.sym.Variable('B') C = mx.symbol.dot(A, B) executor = C.simple_bind(mx.cpu(1), 'write', A=(4096, 4096), B=(4096, 4096)) a = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096)) b = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096)) a.copyto(executor.arg_dict['A']) b.copyto(executor.arg_dict['B']) flag = False print("execution begin") for i in range(iter_num): if i == begin_profiling_iter: t0 = time.clock() profiler.profiler_set_state('run') if i == end_profiling_iter: t1 = time.clock() profiler.profiler_set_state('stop') executor.forward() c = executor.outputs[0] c.wait_to_read() print("execution end") duration = t1 - t0 print('duration: {0}s'.format(duration)) print(' {0}ms/operator'.format(duration*1000/iter_num))
def switch_profiler(param): if param.epoch == 0 and param.nbatch == 100: profiler.profiler_set_state('run') if param.epoch == 0 and param.nbatch == 110: profiler.profiler_set_state('stop') profiler.dump_profile()