示例#1
0
  def testAutoProfiling(self):
    ops.reset_default_graph()
    time_dir = os.path.join(test.get_temp_dir(), 'time')
    memory_dir = os.path.join(test.get_temp_dir(), 'memory')
    profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile')
    # TODO(xpan): Should we create parent directory for them?
    gfile.MkDir(time_dir)
    gfile.MkDir(memory_dir)

    time_opts = (builder(builder.time_and_memory())
                 .with_file_output(os.path.join(time_dir, 'profile'))
                 .select(['micros']).build())
    memory_opts = (builder(builder.time_and_memory())
                   .with_file_output(os.path.join(memory_dir, 'profile'))
                   .select(['bytes']).build())

    time_steps = [2, 3]
    memory_steps = [1, 3]
    dump_steps = [3, 4]

    x = lib.BuildSmallModel()
    with profile_context.ProfileContext(profile_dir,
                                        trace_steps=[1, 2, 3],
                                        dump_steps=[3, 4]) as pctx:
      pctx.add_auto_profiling('scope', time_opts, time_steps)
      pctx.add_auto_profiling('scope', memory_opts, memory_steps)

      self._trainLoop(x, 10, time_dir, time_steps,
                      memory_dir, memory_steps, profile_dir, dump_steps)
示例#2
0
  def testCodeViewLeafGraphNode(self):
    ops.reset_default_graph()
    opts = (builder(builder.trainable_variables_parameter())
            .with_empty_output()
            .with_accounted_types(['.*'])
            .account_displayed_op_only(False)
            .select(['bytes', 'params', 'float_ops', 'device']).build())

    with session.Session(config=self._no_rewrite_session_config()) as sess:
      x = lib.BuildSmallModel()

      self.evaluate(variables.global_variables_initializer())
      run_meta = config_pb2.RunMetadata()
      _ = sess.run(x,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)

      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta, cmd='code', options=opts)

      leaf = tfprof_node
      while leaf.children:
        self.assertEqual(0, len(leaf.graph_nodes))
        leaf = leaf.children[0]
      self.assertEqual(1, len(leaf.graph_nodes))
示例#3
0
  def testSimpleCodeView(self):
    ops.reset_default_graph()
    outfile = os.path.join(test.get_temp_dir(), 'dump')
    # TODO(xpan): Test 'micros'. Since the execution time changes each run,
    # it's a bit difficult to test it now.
    opts = (builder(builder.trainable_variables_parameter())
            .with_file_output(outfile)
            .with_accounted_types(['.*'])
            .with_node_names(show_name_regexes=['.*model_analyzer_testlib.*'])
            .account_displayed_op_only(False)
            .select(['bytes', 'params', 'float_ops', 'num_hidden_ops', 'device',
                     'input_shapes']).build())

    with session.Session(config=self._no_rewrite_session_config()) as sess:
      x = lib.BuildSmallModel()

      self.evaluate(variables.global_variables_initializer())
      run_meta = config_pb2.RunMetadata()
      _ = sess.run(x,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)

      model_analyzer.profile(
          sess.graph, run_meta, cmd='code', options=opts)

      with gfile.Open(outfile, 'r') as f:
        # pylint: disable=line-too-long
        self.assertEqual(
            'node name | requested bytes | # parameters | # float_ops | assigned devices | in',
            lib.CheckAndRemoveDoc(f.read())[0:80])
示例#4
0
  def testSelectEverything(self):
    ops.reset_default_graph()
    outfile = os.path.join(test.get_temp_dir(), 'dump')
    opts = (builder(builder.trainable_variables_parameter())
            .with_file_output(outfile)
            .with_accounted_types(['.*'])
            .select(['params', 'float_ops', 'occurrence', 'device', 'op_types',
                     'input_shapes']).build())

    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True)
    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
    config = config_pb2.ConfigProto(graph_options=graph_options)
    with session.Session(config=config) as sess, ops.device('/device:CPU:0'):
      x = lib.BuildSmallModel()

      sess.run(variables.global_variables_initializer())
      run_meta = config_pb2.RunMetadata()
      _ = sess.run(x,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)

      model_analyzer.profile(
          sess.graph, run_meta, options=opts)

      with gfile.Open(outfile, 'r') as f:
        # pylint: disable=line-too-long
        self.assertEqual(
            'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/11.34k flops, _kTFScopeParent, --/8|--/36, )\n  Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n  DW (3x3x3x6, 162/162 params, 0/324 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n    DW/Initializer (0/0 params, 0/324 flops, _kTFScopeParent, 0/0|1/7, )\n      DW/Initializer/random_normal (0/0 params, 162/324 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/mul (0/0 params, 162/162 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n  DW2 (2x2x6x12, 288/288 params, 0/576 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n    DW2/Initializer (0/0 params, 0/576 flops, _kTFScopeParent, 0/0|1/7, )\n      DW2/Initializer/random_normal (0/0 params, 288/576 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/mul (0/0 params, 288/288 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n  ScalarW (1, 1/1 params, 0/2 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n    ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n    ScalarW/Initializer (0/0 params, 0/2 flops, _kTFScopeParent, 0/0|1/7, )\n      ScalarW/Initializer/random_normal (0/0 params, 1/2 flops, Add, 0/0|1/6, 0:1|1:1)\n        ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n        ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/mul (0/0 params, 1/1 flops, Mul, 0/0|1/1, 0:1|1:1)\n        ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n  _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|_retval_Conv2D_1_0_0, 1/1|1/1, )\n  init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n  zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Const, 1/1|1/1, )\n',
            f.read())
    def testSelectOption(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), 'dump')

        def check_selection(selected, not_selected):
            with gfile.Open(outfile, 'r') as f:
                s = f.read()
                for attr in selected:
                    self.assertTrue(s.find(attr) > 0, s)
                for attr in not_selected:
                    self.assertFalse(s.find(attr) > 0, s)

        with session.Session() as sess:
            x = lib.BuildSmallModel()
            sess.run(variables.global_variables_initializer())
            run_meta = config_pb2.RunMetadata()
            _ = sess.run(x,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)

            opts = builder(
                builder.time_and_memory()).with_file_output(outfile).select(
                    ['micros']).build()
            _ = model_analyzer.profile(sess.graph,
                                       run_meta=run_meta,
                                       options=opts)
            check_selection(
                ['total execution time', 'accelerator execution time'],
                ['bytes'])

            opts = builder(
                builder.time_and_memory()).with_file_output(outfile).select(
                    ['bytes']).build()
            _ = model_analyzer.profile(sess.graph,
                                       run_meta=run_meta,
                                       options=opts)
            check_selection(['requested bytes'],
                            ['peak bytes', 'residual bytes', 'output bytes'])

            opts = builder(
                builder.time_and_memory()).with_file_output(outfile).select(
                    ['peak_bytes', 'residual_bytes', 'output_bytes']).build()
            _ = model_analyzer.profile(sess.graph,
                                       run_meta=run_meta,
                                       options=opts)
            check_selection(['peak bytes', 'residual bytes', 'output bytes'],
                            ['requested_bytes'])
  def testDumpToFile(self):
    ops.reset_default_graph()
    outfile = os.path.join(test.get_temp_dir(), 'dump')
    opts = builder(builder.trainable_variables_parameter()
                  ).with_file_output(outfile).build()

    with session.Session() as sess:
      _ = lib.BuildSmallModel()
      model_analyzer.profile(sess.graph, options=opts)

      with gfile.Open(outfile, 'r') as f:
        self.assertEqual(u'node name | # parameters\n'
                         '_TFProfRoot (--/451 params)\n'
                         '  DW (3x3x3x6, 162/162 params)\n'
                         '  DW2 (2x2x6x12, 288/288 params)\n'
                         '  ScalarW (1, 1/1 params)\n',
                         f.read())
    def testDumpToFile(self):
        ops.reset_default_graph()
        opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy()
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts['output'] = 'file:outfile=' + outfile

        with session.Session() as sess:
            _ = lib.BuildSmallModel()
            model_analyzer.profile(sess.graph, options=opts)

            with gfile.Open(outfile, 'r') as f:
                self.assertEqual(
                    u'node name | # parameters\n'
                    '_TFProfRoot (--/451 params)\n'
                    '  DW (3x3x3x6, 162/162 params)\n'
                    '  DW2 (2x2x6x12, 288/288 params)\n'
                    '  ScalarW (1, 1/1 params)\n', f.read())
  def testMultipleProfilePerStep(self):
    ops.reset_default_graph()
    opts = (builder(builder.trainable_variables_parameter())
            .with_empty_output()
            .with_accounted_types(['.*'])
            .select(['micros', 'bytes', 'peak_bytes',
                     'residual_bytes', 'output_bytes']).build())

    r = lib.BuildSmallModel()
    sess = session.Session()
    profiler = model_analyzer.Profiler(sess.graph)

    init_var_run_meta = config_pb2.RunMetadata()
    sess.run(variables.global_variables_initializer(),
             options=config_pb2.RunOptions(
                 trace_level=config_pb2.RunOptions.FULL_TRACE),
             run_metadata=init_var_run_meta)

    train_run_meta = config_pb2.RunMetadata()
    sess.run(r,
             options=config_pb2.RunOptions(
                 trace_level=config_pb2.RunOptions.FULL_TRACE),
             run_metadata=train_run_meta)

    profiler.add_step(0, train_run_meta)
    ret1 = profiler.profile_name_scope(opts)
    n1 = lib.SearchTFProfNode(
        ret1, 'DW/Initializer/random_normal/RandomStandardNormal')
    # Without the var initialization run_meta, it doesn't have the
    # information of var_initialization.
    self.assertEqual(n1.exec_micros, 0)
    self.assertEqual(n1.requested_bytes, 0)
    self.assertEqual(n1.peak_bytes, 0)
    self.assertEqual(n1.residual_bytes, 0)

    profiler.add_step(0, init_var_run_meta)
    ret2 = profiler.profile_name_scope(opts)
    n2 = lib.SearchTFProfNode(
        ret2, 'DW/Initializer/random_normal/RandomStandardNormal')
    # After adding the var initialization run_meta.
    self.assertGreater(n2.exec_micros, 0)
    self.assertGreater(n2.requested_bytes, 0)
    self.assertGreater(n2.peak_bytes, 0)
    self.assertGreater(n2.residual_bytes, 0)
    def testSelectEverything(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'params', 'float_ops', 'occurrence', 'device', 'op_types',
                    'input_shapes'
                ]).build())

        with session.Session(config=self._no_rewrite_session_config()
                             ) as sess, ops.device('/device:CPU:0'):
            x = lib.BuildSmallModel()

            self.evaluate(variables.global_variables_initializer())
            run_meta = config_pb2.RunMetadata()
            _ = sess.run(x,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)

            model_analyzer.profile(sess.graph, run_meta, options=opts)
    def testEager(self):
        ops.reset_default_graph()
        with context.eager_mode():
            outfile = os.path.join(test.get_temp_dir(), 'dump')
            opts = builder(
                builder.time_and_memory()).with_file_output(outfile).build()
            context.enable_run_metadata()
            lib.BuildSmallModel()

            profiler = model_analyzer.Profiler()
            profiler.add_step(0, context.export_run_metadata())
            context.disable_run_metadata()
            profiler.profile_operations(opts)
            with gfile.Open(outfile, 'r') as f:
                out_str = f.read()
                self.assertTrue('Conv2D' in out_str)
                self.assertTrue('VarHandleOp' in out_str)

            with gfile.Open('/tmp/eager_profile', 'wb') as f:
                profile_pb = tfprof_log_pb2.ProfileProto()
                profile_pb.ParseFromString(profiler.serialize_to_string())
                profile_pb_str = '%s' % profile_pb
                self.assertTrue('Conv2D' in profile_pb_str)
                self.assertTrue('VarHandleOp' in profile_pb_str)
    def testSelectEverythingDetail(self):
        ops.reset_default_graph()
        dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'micros', 'bytes', 'params', 'float_ops', 'occurrence',
                    'device', 'op_types', 'input_shapes'
                ]).build())

        with profile_context.ProfileContext(test.get_temp_dir(),
                                            trace_steps=[],
                                            dump_steps=[]) as pctx:
            with session.Session() as sess, ops.device(dev):
                x = lib.BuildSmallModel()

                sess.run(variables.global_variables_initializer())
                pctx.trace_next_step()
                pctx.dump_next_step()
                _ = sess.run(x)

                pctx.profiler.profile_name_scope(options=opts)

                with gfile.Open(outfile, 'r') as f:
                    # pylint: disable=line-too-long
                    dump_str = lib.CheckAndRemoveDoc(f.read())
                    outputs = dump_str.split('\n')

                    self.assertEqual(
                        outputs[0],
                        'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes'
                    )
                    for o in outputs[1:]:
                        if o.find('Conv2D ') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            # Make sure time is profiled.
                            gap = 1 if test.is_gpu_available() else 2
                            for i in range(3, 6, gap):
                                mat = re.search('(.*)[um]s/(.*)[um]s',
                                                metrics[i])
                                self.assertGreater(float(mat.group(1)), 0.0)
                                self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure device is profiled.
                            if test.is_gpu_available():
                                self.assertTrue(metrics[6].find('gpu') > 0)
                                self.assertFalse(metrics[6].find('cpu') > 0)
                            else:
                                self.assertFalse(metrics[6].find('gpu') > 0)
                                self.assertTrue(metrics[6].find('cpu') > 0)
                            # Make sure float_ops is profiled.
                            mat = re.search('(.*)k/(.*)k flops',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure op_count is profiled.
                            self.assertEqual(metrics[8].strip(), '1/1|1/1')
                            # Make sure input_shapes is profiled.
                            self.assertEqual(metrics[9].strip(),
                                             '0:2x6x6x3|1:3x3x3x6')

                        if o.find('DW (3x3x3x6') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            mat = re.search('(.*)/(.*) params',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                    # pylint: enable=line-too-long

        # Test that profiler restored from profile file gives the same result.
        gfile.Remove(outfile)
        profile_file = os.path.join(test.get_temp_dir(), 'profile_1')
        with lib.ProfilerFromFile(profile_file) as profiler:
            profiler.profile_name_scope(options=opts)
            with gfile.Open(outfile, 'r') as f:
                self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))
    def testMinOption(self):
        ops.reset_default_graph()

        def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0):
            for n in nodes:
                if mm > 0:
                    self.assertGreaterEqual(n.exec_micros, mm)
                if mam > 0:
                    self.assertGreaterEqual(n.accelerator_exec_micros, mam)
                if mcm > 0:
                    self.assertGreaterEqual(n.cpu_exec_micros, mcm)
                if mb > 0:
                    self.assertGreaterEqual(n.requested_bytes, mb)
                if mpb > 0:
                    self.assertGreaterEqual(n.peak_bytes, mpb)
                if mrb > 0:
                    self.assertGreaterEqual(n.residual_bytes, mrb)
                if mob > 0:
                    self.assertGreaterEqual(n.output_bytes, mob)
                check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)

        with session.Session() as sess:
            x = lib.BuildSmallModel()
            sess.run(variables.global_variables_initializer())
            run_meta = config_pb2.RunMetadata()
            _ = sess.run(x,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)

            min_val = random.randint(0, 10000)

            opts = builder(builder.time_and_memory(
                min_micros=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mm=min_val)

            opts = builder(
                builder.time_and_memory(min_accelerator_micros=min_val)
            ).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mam=min_val)

            opts = builder(builder.time_and_memory(
                min_cpu_micros=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mcm=min_val)

            opts = builder(builder.time_and_memory(
                min_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mb=min_val)

            opts = builder(builder.time_and_memory(
                min_peak_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mpb=min_val)

            opts = builder(builder.time_and_memory(
                min_residual_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mrb=min_val)

            opts = builder(builder.time_and_memory(
                min_output_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mob=min_val)
示例#13
0
    def testSelectEverthingDetail(self):
        ops.reset_default_graph()
        dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0'
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'micros', 'bytes', 'params', 'float_ops', 'occurrence',
                    'device', 'op_types', 'input_shapes'
                ]).build())

        config = config_pb2.ConfigProto()
        with session.Session(config=config) as sess, ops.device(dev):
            x = lib.BuildSmallModel()

            sess.run(variables.global_variables_initializer())
            run_meta = config_pb2.RunMetadata()
            _ = sess.run(x,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)

            model_analyzer.profile(sess.graph, run_meta, options=opts)

            with gfile.Open(outfile, 'r') as f:
                # pylint: disable=line-too-long
                outputs = f.read().split('\n')

                self.assertEqual(
                    outputs[0],
                    'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes'
                )
                for o in outputs[1:]:
                    if o.find('Conv2D ') > 0:
                        metrics = o[o.find('(') + 1:o.find(')')].split(',')
                        # Make sure time is profiled.
                        gap = 1 if test.is_gpu_available() else 2
                        for i in range(3, 6, gap):
                            mat = re.search('(.*)us/(.*)us', metrics[i])
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                        # Make sure device is profiled.
                        if test.is_gpu_available():
                            self.assertTrue(metrics[6].find('gpu') > 0)
                            self.assertFalse(metrics[6].find('cpu') > 0)
                        else:
                            self.assertFalse(metrics[6].find('gpu') > 0)
                            self.assertTrue(metrics[6].find('cpu') > 0)
                        # Make sure float_ops is profiled.
                        mat = re.search('(.*)k/(.*)k flops',
                                        metrics[1].strip())
                        self.assertGreater(float(mat.group(1)), 0.0)
                        self.assertGreater(float(mat.group(2)), 0.0)
                        # Make sure op_count is profiled.
                        self.assertEqual(metrics[8].strip(), '1/1|1/1')
                        # Make sure input_shapes is profiled.
                        self.assertEqual(metrics[9].strip(),
                                         '0:2x6x6x3|1:3x3x3x6')

                    if o.find('DW (3x3x3x6') > 0:
                        metrics = o[o.find('(') + 1:o.find(')')].split(',')
                        mat = re.search('(.*)/(.*) params', metrics[1].strip())
                        self.assertGreater(float(mat.group(1)), 0.0)
                        self.assertGreater(float(mat.group(2)), 0.0)