Пример #1
0
def test_linearize():
  nodes = make_caterpillar_graph(5)
  linearize.linearize()

  sess = create_session()

  import memory_util
  memory_util.vlog(1)
  with memory_util.capture_stderr() as stderr:
    sess.run(nodes[-1].op)
  memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)
Пример #2
0
def test_linearize():
    nodes = make_caterpillar_graph(5)
    linearize.linearize()

    sess = create_session()

    import memory_util
    memory_util.vlog(1)
    with memory_util.capture_stderr() as stderr:
        sess.run(nodes[-1].op)
    memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)
Пример #3
0
def run_and_analyze(in_shape):
    with memory_util.capture_stderr() as stderr:
        res = sess.run(y, feed_dict={x: np.random.randn(*in_shape)})

    print res.shape

    expected_mem = reduce(lambda i, j: i * j, in_shape)  # inputs
    expected_mem += (kernel_size**2) + in_channels * out_channels  # weights
    expected_mem += reduce(lambda i, j: i * j, res.shape)  # outputs
    expected_mem *= 4  # 4 bytes per float

    peak_mem = memory_util.peak_memory(stderr)

    print 'expected mem usage (MB): ', expected_mem / BYTES_PER_MB
    print 'peak     mem usage (MB): ', peak_mem / BYTES_PER_MB
    print 'peak:expected mem ratio: ', peak_mem / float(expected_mem)
    print memory_util.print_memory_timeline(stderr)
    memory_util.plot_memory_timeline(plt, stderr)

    import ipdb
    ipdb.set_trace()
Пример #4
0
    config = tf.ConfigProto(
        log_device_placement=False,
        graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(
            opt_level=tf.OptimizerOptions.L0)))
    return tf.InteractiveSession(config=config)


node_mbs = 1
length = 5

dtype = np.float32
n = node_mbs * 250000
a0_ = tf.ones((n, ), dtype=dtype)
a0 = tf.Variable(a0_, name="a0")
a = a0
for i in range(1, length):
    name = "a" + str(i)
    a = tf.tanh(a, name=name)

grad = tf.gradients([a], [a0])[0]
sess = create_session()

sess.run(tf.global_variables_initializer())

with memory_util.capture_stderr() as stderr:
    sess.run(grad.op)

peak_memory = memory_util.peak_memory(stderr)
memory_util.print_memory_timeline(stderr)
print("Peak memory: %d" % (peak_memory, ))
def main():
  import memory_util
  memory_util.vlog(1)   # vlog=2 on GPU machine will spam gpu "polling" msgs
  
  tf.reset_default_graph()
  n = 3

  # TODO: fix edge case with n=2
  nodes = make_chain_tanh(n)
  a0 = nodes[0]
  a = nodes[-1]
  #grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
  grad = tf.gradients(a, [a0])[0]

  sess = create_session()
  sess.run(tf.global_variables_initializer())

#  feed_dict = {a0, 
  with memory_util.capture_stderr() as stderr:
    sess.run(grad.op)

  peak_memory1 = memory_util.peak_memory(stderr.getvalue())
  # 20 mem used with following tensors picked automatically as bottlenecks
  # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
  # 'a82:0', 'a91:0']

  # method 2
  mem_op = tf.contrib.memory_stats.MaxBytesInUse()
  peak_memory2 = sess.run(mem_op)

  # method 3
  run_metadata = tf.RunMetadata()
  run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)


  sess.run(grad.op, run_metadata=run_metadata, options=run_options,)
  print(run_metadata)
  peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu']
  print(peak_memory1, "VLOG_MEMORY")
  print(peak_memory2, "MaxBytesInUse")
  print(peak_memory3, "metadata")

  cpu,gpu=memory_util._retrieve_cpu_gpu_stats(run_metadata)
  if cpu:
    bytes_in_use_cpu = [node.memory[0].allocator_bytes_in_use for node in cpu]
  if gpu:
    bytes_in_use_gpu = [node.memory[0].allocator_bytes_in_use for node in gpu]

  peak_memory4 = max(bytes_in_use_gpu)
  print(peak_memory4, "metadata max")

  # fourth way would parse "allocator_bytes_in_use
   # node_stats {
   #    node_name: "Square"
   #    all_start_micros: 1509664297214870
   #    op_start_rel_micros: 4
   #    op_end_rel_micros: 115
   #    all_end_rel_micros: 136
   #    memory {
   #      allocator_name: "GPU_0_bfc"
   #      allocator_bytes_in_use: 6013952
   #    }
  expected_peak = 3 * 10**6 
  util.report_memory(peak_memory1, expected_peak)

  assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."
def main():
    import memory_util
    memory_util.vlog(1)  # vlog=2 on GPU machine will spam gpu "polling" msgs

    tf.reset_default_graph()
    n = 3

    # TODO: fix edge case with n=2
    nodes = make_chain_tanh(n)
    a0 = nodes[0]
    a = nodes[-1]
    #grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
    grad = tf.gradients(a, [a0])[0]

    sess = create_session()
    sess.run(tf.global_variables_initializer())

    #  feed_dict = {a0,
    with memory_util.capture_stderr() as stderr:
        sess.run(grad.op)

    peak_memory1 = memory_util.peak_memory(stderr.getvalue())
    # 20 mem used with following tensors picked automatically as bottlenecks
    # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
    # 'a82:0', 'a91:0']

    # method 2
    mem_op = tf.contrib.memory_stats.MaxBytesInUse()
    peak_memory2 = sess.run(mem_op)

    # method 3
    run_metadata = tf.RunMetadata()
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    sess.run(
        grad.op,
        run_metadata=run_metadata,
        options=run_options,
    )
    print(run_metadata)
    peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu']
    print(peak_memory1, "VLOG_MEMORY")
    print(peak_memory2, "MaxBytesInUse")
    print(peak_memory3, "metadata")

    cpu, gpu = memory_util._retrieve_cpu_gpu_stats(run_metadata)
    if cpu:
        bytes_in_use_cpu = [
            node.memory[0].allocator_bytes_in_use for node in cpu
        ]
    if gpu:
        bytes_in_use_gpu = [
            node.memory[0].allocator_bytes_in_use for node in gpu
        ]

    peak_memory4 = max(bytes_in_use_gpu)
    print(peak_memory4, "metadata max")

    # fourth way would parse "allocator_bytes_in_use
    # node_stats {
    #    node_name: "Square"
    #    all_start_micros: 1509664297214870
    #    op_start_rel_micros: 4
    #    op_end_rel_micros: 115
    #    all_end_rel_micros: 136
    #    memory {
    #      allocator_name: "GPU_0_bfc"
    #      allocator_bytes_in_use: 6013952
    #    }
    expected_peak = 3 * 10**6
    util.report_memory(peak_memory1, expected_peak)

    assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."