def testSimpleCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') # TODO(xpan): Test 'micros'. Since the execution time changes each run, # it's a bit difficult to test it now. opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.*']). account_displayed_op_only(False).select([ 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'input_shapes' ]).build()) with session.Session() as sess: x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'node name | requested bytes | # parameters | # float_ops | assigned devices | in', lib.CheckAndRemoveDoc(f.read())[0:80])
def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter()) .with_file_output(outfile) .with_accounted_types(['.*']) .with_node_names(show_name_regexes= ['.*model_analyzer_testlib.py.*']) .account_displayed_op_only(False) .select(['params', 'float_ops']).build()) with profile_context.ProfileContext(test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) pctx.trace_next_step() _ = sess.run(x) tfprof_node = pctx.profiler.profile_python(options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') self.assertGreater(len(lines), 5) result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertTrue( compat.as_text(lib.CheckAndRemoveDoc(result)) .startswith('node name | # parameters | # float_ops')) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertLess(168800, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( 'model_analyzer_testlib.py:63:BuildFullModel', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:63:BuildFullModel (gradient)', tfprof_node.children[1].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel (gradient)', tfprof_node.children[3].name) self.assertEqual( 'model_analyzer_testlib.py:69:BuildFullModel', tfprof_node.children[4].name) self.assertEqual( 'model_analyzer_testlib.py:70:BuildFullModel', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:70:BuildFullModel (gradient)', tfprof_node.children[6].name) self.assertEqual( 'model_analyzer_testlib.py:72:BuildFullModel', tfprof_node.children[7].name)
def testOpView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder(builder.trainable_variables_parameter( )).with_file_output(outfile).with_accounted_types( ['.*']).with_min_occurrence(10).order_by('occurrence').select([ 'params', 'micros', 'bytes', 'peak_bytes', 'residual_bytes', 'output_bytes', 'occurrence', 'input_shapes' ]).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='op', options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( 'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes', lib.CheckAndRemoveDoc(f.read()).replace('\t', '').replace( ' ', '')[0:170]) # pylint: enable=line-too-long total_children = 0 last_occurrence = 1e32 input_shapes = 0 last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros while tfprof_node.children: for gnode in tfprof_node.graph_nodes: input_shapes += len(gnode.input_shapes) self.assertEqual(len(tfprof_node.children), 1) tfprof_node = tfprof_node.children[0] self.assertEqual(last_total_micros, tfprof_node.total_exec_micros + last_micros) last_total_micros = tfprof_node.total_exec_micros last_micros = tfprof_node.exec_micros total_children += 1 self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) last_occurrence = len(tfprof_node.graph_nodes) self.assertGreater(input_shapes, 0)
def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.py.*']). account_displayed_op_only(False).select( ['params', 'float_ops']).build()) with profile_context.ProfileContext(test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) pctx.trace_next_step() _ = sess.run(x) tfprof_node = pctx.profiler.profile_python(options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertEqual( compat.as_bytes( 'node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.86k flops)\n model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.58k flops)\n model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n' ), compat.as_bytes(lib.CheckAndRemoveDoc(result))) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertEqual(168863, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual('model_analyzer_testlib.py:63:BuildFullModel', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:63:BuildFullModel (gradient)', tfprof_node.children[1].name) self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel (gradient)', tfprof_node.children[3].name) self.assertEqual('model_analyzer_testlib.py:69:BuildFullModel', tfprof_node.children[4].name) self.assertEqual('model_analyzer_testlib.py:70:BuildFullModel', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:70:BuildFullModel (gradient)', tfprof_node.children[6].name) self.assertEqual('model_analyzer_testlib.py:72:BuildFullModel', tfprof_node.children[7].name)
def testDumpToFile(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = builder(builder.trainable_variables_parameter() ).with_file_output(outfile).build() with session.Session() as sess: _ = lib.BuildSmallModel() model_analyzer.profile(sess.graph, options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(u'node name | # parameters\n' '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', lib.CheckAndRemoveDoc(f.read()))
def testSelectEverythingDetail(self): ops.reset_default_graph() dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) with profile_context.ProfileContext(test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: with session.Session() as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) pctx.trace_next_step() pctx.dump_next_step() _ = sess.run(x) pctx.profiler.profile_name_scope(options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long dump_str = lib.CheckAndRemoveDoc(f.read()) outputs = dump_str.split('\n') self.assertEqual( outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' ) for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)[um]s/(.*)[um]s', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # pylint: enable=line-too-long # Test that profiler restored from profile file gives the same result. gfile.Remove(outfile) profile_file = os.path.join(test.get_temp_dir(), 'profile_1') with lib.ProfilerFromFile(profile_file) as profiler: profiler.profile_name_scope(options=opts) with gfile.Open(outfile, 'r') as f: self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))