def test_chain():
  """Runs regular chain gradient, makes sure memory usage makes sense."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  

  n = 5
  nodes = make_chain_tanh(n)
  a0 = nodes[0]
  a = nodes[-1]
  with tf.control_dependencies([a]):
      grad = tf.gradients([a], [a0])[0]

  #linearize_lib.linearize()

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  expected_peak = (n)*10**6
  
  assert peak_memory > 2e6
  
  # "loss" tensor
  util.report_memory(peak_memory, expected_peak)
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1e6+10000, "Difference too large."
Exemplo n.º 2
0
def test_resnet_rewrite_tarjan(linearize=False):
    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 6  # use n>5 (see test_chain_memory)

    nodes = make_resnet(n)
    a0 = nodes[0]
    a = nodes[-1]

    checkpoints = [nodes[3], nodes[5]]  # ['a03_add:0', 'a05_add:0']
    grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]
    if linearize:
        added = linearize_lib.linearize(grad.op)

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    expected_peak = 4e6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1.1 * 10**6, "Difference too large."
Exemplo n.º 3
0
def test_chain():
    """Runs regular chain gradient, makes sure memory usage makes sense."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5
    nodes = make_chain_tanh(n)
    a0 = nodes[0]
    a = nodes[-1]
    with tf.control_dependencies([a]):
        grad = tf.gradients([a], [a0])[0]

    #linearize_lib.linearize()

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    expected_peak = (n) * 10**6

    assert peak_memory > 2e6

    # "loss" tensor
    util.report_memory(peak_memory, expected_peak)
    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1e6 + 10000, "Difference too large."
def test_resnet_rewrite_memory(linearize=False):
  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 6   # use n>5 (see test_chain_memory)

  nodes = make_resnet(n)
  a0 = nodes[0]
  a = nodes[-1]


  checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0']
  grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
  if linearize:
    added = linearize_lib.linearize(grad.op)

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  # 1 for activation of each tanh node + 1 for initial backprop node
  # + 1 temporary memory for computing the adds,
  # -1 for discarding, then recomputing a1_tanh
  expected_peak = (n+1+1-1)*10**6 
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_resnet_rewrite_tarjan(linearize=False):
  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 6   # use n>5 (see test_chain_memory)

  nodes = make_resnet(n)
  a0 = nodes[0]
  a = nodes[-1]


  checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0']
  grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]
  if linearize:
    added = linearize_lib.linearize(grad.op)

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  expected_peak = 4e6
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_long_resnet():
  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100
  nodes = make_resnet(n)
  a0 = nodes[0]
  a = nodes[-1]

  with tf.control_dependencies([a]):
      grad = tf.gradients([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  # 1 for activation of each tanh node + 1 for initial backprop node
  # + 1 temporary memory for computing the adds
  expected_peak = (n+1)*10**6 
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_resnet_rewrite_tarjan(linearize=False):
  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100
  nodes = make_resnet(n)
  a0 = nodes[0]
  a = nodes[-1]

  start_time = time.time()
  with tf.control_dependencies([a]):
    grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

  start_time = time.time()
  if linearize:
    added = linearize_lib.linearize(grad.op)

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  # 20 mem used with following tensors picked automatically
  # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0',
  # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0']

  expected_peak = 18 * 10**6 
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_memory(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="memory" 
  strategy."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  tf.add_to_collection("checkpoints", nodes[10])
  tf.add_to_collection("checkpoints", nodes[20])
  #grad = memory_saving_gradients.gradients_collection([a], [a0])[0]
  grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    added = linearize_lib.linearize()

  peak_memory = cpu_peak()
  # 20 mem used with following tensors picked automatically as bottlenecks
  # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
  # 'a82:0', 'a91:0']
  expected_peak = 20 * 10**6 
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_tarjan(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="tarjan" 
  strategy."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    added = linearize_lib.linearize()

  peak_memory = cpu_peak()
  # points picked
  #  a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0
  expected_peak = 18e6
  util.report_memory(peak_memory, expected_peak)

  # todo: remove "REMOVE_ASSERTS"
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_tarjan(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="tarjan"
  strategy."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
         # hence use n>5 to avoid this edge condition

  nodes = util.make_chain_tanh_fill(n)
  a0 = nodes[0]
  a = nodes[-1]
  grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    linearize_lib.linearize()

  peak_memory = cpu_peak()
  expected_peak = 5e6  # originally needed 7 units, now a3,a5 are recomputed
  util.report_memory(peak_memory, expected_peak)
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1e5, "Difference too large."
def test_chain_memory(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="memory" strat."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
         # hence use n>5 to avoid this edge condition

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    linearize_lib.linearize()

  peak_memory = cpu_peak()
  expected_peak = (n+1-1)*10**6  # 1 for each node + 1 for generated - 1 saved
                                 # "loss" tensor
  util.report_memory(peak_memory, expected_peak)
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 10000, "Difference too large."
def test_dual_chain_rewrite():
  """Runs regular chain gradient, makes sure memory usage makes sense."""


  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5
  nodes1 = make_chain_tanh_constant(n, "a")
  nodes2 = make_chain_tanh_constant(n, "b")

  a0,b0 = nodes1[0], nodes2[0]
  a, b = nodes1[-1], nodes2[-1]

  grad = memory_saving_gradients.gradients([a+b], [a0, b0],
                                           checkpoints=[nodes1[2], nodes2[2]])

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun([grad[0].op, grad[1].op])

  peak_memory = cpu_peak()
  # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes
  # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily
  # also, this moves "peak memory" scenario lower down the chain
  # where the final addition node activations are no longer needed (another -1)
  expected_peak = (2*(n-1)+1)*10**6 
  util.report_memory(peak_memory, expected_peak)

  # since two independent chains, some variability in node scheduling
  # allow 1MB slack
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
def test_dual_chain():
  """Runs regular chain gradient, makes sure memory usage makes sense."""


  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5
  nodes1 = make_chain_tanh_constant(n, "a")
  nodes2 = make_chain_tanh_constant(n, "b")

  a0,b0 = nodes1[0], nodes2[0]
  a, b = nodes1[-1], nodes2[-1]
  grad = tf.gradients([a+b], [a0, b0])

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun([grad[0].op, grad[1].op])

  peak_memory = cpu_peak()
  expected_peak = (2*n+1)*10**6
  util.report_memory(peak_memory, expected_peak)

  # 1 unit of memory slack since parallel computation chains adds
  # scheduling variablity
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1*10**9, "Difference too large."
def test_chain_rewrite(linearize=False):
  """Take chain of length 5, save 2 nodes, make sure 2 units of RAM is
  saved."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5

  a0, a1, a2, a3, a4 = make_chain_tanh(n)
  grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1,a3])[0]
  expected_peak = (n+1-2)*10**6  # subtract 2 since we recompute 2

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)
  if linearize:
    linearize_lib.linearize()

  peak_memory = cpu_peak()
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1e6+10000, "Difference too large."
Exemplo n.º 15
0
def test_chain_rewrite(linearize=False):
    """Take chain of length 5, save 2 nodes, make sure 2 units of RAM is
  saved."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5

    a0, a1, a2, a3, a4 = make_chain_tanh(n)
    grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1,
                                                                      a3])[0]
    expected_peak = (n + 1 - 2) * 10**6  # subtract 2 since we recompute 2

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)
    if linearize:
        linearize_lib.linearize()

    peak_memory = cpu_peak()
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1e6 + 10000, "Difference too large."
Exemplo n.º 16
0
def test_resnet_rewrite_memory(linearize=False):
    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 6  # use n>5 (see test_chain_memory)

    nodes = make_resnet(n)
    a0 = nodes[0]
    a = nodes[-1]

    checkpoints = [nodes[3], nodes[5]]  # ['a03_add:0', 'a05_add:0']
    grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
    if linearize:
        added = linearize_lib.linearize(grad.op)

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    # 1 for activation of each tanh node + 1 for initial backprop node
    # + 1 temporary memory for computing the adds,
    # -1 for discarding, then recomputing a1_tanh
    expected_peak = (n + 1 + 1 - 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1.1 * 10**6, "Difference too large."
Exemplo n.º 17
0
def test_long_resnet():
    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100
    nodes = make_resnet(n)
    a0 = nodes[0]
    a = nodes[-1]

    with tf.control_dependencies([a]):
        grad = tf.gradients([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    # 1 for activation of each tanh node + 1 for initial backprop node
    # + 1 temporary memory for computing the adds
    expected_peak = (n + 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemplo n.º 18
0
def test_long_resnet_rewrite_tarjan(linearize=False):
    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100
    nodes = make_resnet(n)
    a0 = nodes[0]
    a = nodes[-1]

    start_time = time.time()
    with tf.control_dependencies([a]):
        grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

    start_time = time.time()
    if linearize:
        added = linearize_lib.linearize(grad.op)

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    # 20 mem used with following tensors picked automatically
    # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0',
    # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0']

    expected_peak = 18 * 10**6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemplo n.º 19
0
def test_long_chain_memory(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="memory" 
  strategy."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    tf.add_to_collection("checkpoints", nodes[10])
    tf.add_to_collection("checkpoints", nodes[20])
    #grad = memory_saving_gradients.gradients_collection([a], [a0])[0]
    grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        added = linearize_lib.linearize()

    peak_memory = cpu_peak()
    # 20 mem used with following tensors picked automatically as bottlenecks
    # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
    # 'a82:0', 'a91:0']
    expected_peak = 20 * 10**6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemplo n.º 20
0
def test_long_chain_tarjan(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="tarjan" 
  strategy."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        added = linearize_lib.linearize()

    peak_memory = cpu_peak()
    # points picked
    #  a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0
    expected_peak = 18e6
    util.report_memory(peak_memory, expected_peak)

    # todo: remove "REMOVE_ASSERTS"
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemplo n.º 21
0
def test_chain_tarjan(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="tarjan"
  strategy."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
    # hence use n>5 to avoid this edge condition

    nodes = util.make_chain_tanh_fill(n)
    a0 = nodes[0]
    a = nodes[-1]
    grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        linearize_lib.linearize()

    peak_memory = cpu_peak()
    expected_peak = 5e6  # originally needed 7 units, now a3,a5 are recomputed
    util.report_memory(peak_memory, expected_peak)
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1e5, "Difference too large."
Exemplo n.º 22
0
def test_chain_memory(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="memory" strat."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
    # hence use n>5 to avoid this edge condition

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        linearize_lib.linearize()

    peak_memory = cpu_peak()
    expected_peak = (n + 1 -
                     1) * 10**6  # 1 for each node + 1 for generated - 1 saved
    # "loss" tensor
    util.report_memory(peak_memory, expected_peak)
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 10000, "Difference too large."
Exemplo n.º 23
0
def test_dual_chain_rewrite():
    """Runs regular chain gradient, makes sure memory usage makes sense."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5
    nodes1 = make_chain_tanh_constant(n, "a")
    nodes2 = make_chain_tanh_constant(n, "b")

    a0, b0 = nodes1[0], nodes2[0]
    a, b = nodes1[-1], nodes2[-1]

    grad = memory_saving_gradients.gradients(
        [a + b], [a0, b0], checkpoints=[nodes1[2], nodes2[2]])

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun([grad[0].op, grad[1].op])

    peak_memory = cpu_peak()
    # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes
    # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily
    # also, this moves "peak memory" scenario lower down the chain
    # where the final addition node activations are no longer needed (another -1)
    expected_peak = (2 * (n - 1) + 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    # since two independent chains, some variability in node scheduling
    # allow 1MB slack
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
Exemplo n.º 24
0
def test_dual_chain():
    """Runs regular chain gradient, makes sure memory usage makes sense."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5
    nodes1 = make_chain_tanh_constant(n, "a")
    nodes2 = make_chain_tanh_constant(n, "b")

    a0, b0 = nodes1[0], nodes2[0]
    a, b = nodes1[-1], nodes2[-1]
    grad = tf.gradients([a + b], [a0, b0])

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun([grad[0].op, grad[1].op])

    peak_memory = cpu_peak()
    expected_peak = (2 * n + 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    # 1 unit of memory slack since parallel computation chains adds
    # scheduling variablity
    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1.1 * 10**9, "Difference too large."
def test_chain_rewrite_save_first():
  """Take chain of length 5, save first node."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5

  a0, a1, a2, a3, a4 = make_chain_tanh_constant(n)
  grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1, a3])[0]
  expected_peak = (n+1-2)*10**6 

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemplo n.º 26
0
def test_chain_rewrite_save_one_before_last():
    """Take chain of length 5, save first node."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5

    a0, a1, a2, a3, a4 = make_chain_tanh_constant(n)
    grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a2])[0]
    expected_peak = (n + 1 - 2) * 10**6

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def main():
  import memory_util
  memory_util.vlog(1)   # vlog=2 on GPU machine will spam gpu "polling" msgs
  
  tf.reset_default_graph()
  n = 3

  # TODO: fix edge case with n=2
  nodes = make_chain_tanh(n)
  a0 = nodes[0]
  a = nodes[-1]
  #grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
  grad = tf.gradients(a, [a0])[0]

  sess = create_session()
  sess.run(tf.global_variables_initializer())

#  feed_dict = {a0, 
  with memory_util.capture_stderr() as stderr:
    sess.run(grad.op)

  peak_memory1 = memory_util.peak_memory(stderr.getvalue())
  # 20 mem used with following tensors picked automatically as bottlenecks
  # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
  # 'a82:0', 'a91:0']

  # method 2
  mem_op = tf.contrib.memory_stats.MaxBytesInUse()
  peak_memory2 = sess.run(mem_op)

  # method 3
  run_metadata = tf.RunMetadata()
  run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)


  sess.run(grad.op, run_metadata=run_metadata, options=run_options,)
  print(run_metadata)
  peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu']
  print(peak_memory1, "VLOG_MEMORY")
  print(peak_memory2, "MaxBytesInUse")
  print(peak_memory3, "metadata")

  cpu,gpu=memory_util._retrieve_cpu_gpu_stats(run_metadata)
  if cpu:
    bytes_in_use_cpu = [node.memory[0].allocator_bytes_in_use for node in cpu]
  if gpu:
    bytes_in_use_gpu = [node.memory[0].allocator_bytes_in_use for node in gpu]

  peak_memory4 = max(bytes_in_use_gpu)
  print(peak_memory4, "metadata max")

  # fourth way would parse "allocator_bytes_in_use
   # node_stats {
   #    node_name: "Square"
   #    all_start_micros: 1509664297214870
   #    op_start_rel_micros: 4
   #    op_end_rel_micros: 115
   #    all_end_rel_micros: 136
   #    memory {
   #      allocator_name: "GPU_0_bfc"
   #      allocator_bytes_in_use: 6013952
   #    }
  expected_peak = 3 * 10**6 
  util.report_memory(peak_memory1, expected_peak)

  assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."
def main():
    import memory_util
    memory_util.vlog(1)  # vlog=2 on GPU machine will spam gpu "polling" msgs

    tf.reset_default_graph()
    n = 3

    # TODO: fix edge case with n=2
    nodes = make_chain_tanh(n)
    a0 = nodes[0]
    a = nodes[-1]
    #grad = memory_saving_gradients.gradients_memory([a], [a0])[0]
    grad = tf.gradients(a, [a0])[0]

    sess = create_session()
    sess.run(tf.global_variables_initializer())

    #  feed_dict = {a0,
    with memory_util.capture_stderr() as stderr:
        sess.run(grad.op)

    peak_memory1 = memory_util.peak_memory(stderr.getvalue())
    # 20 mem used with following tensors picked automatically as bottlenecks
    # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
    # 'a82:0', 'a91:0']

    # method 2
    mem_op = tf.contrib.memory_stats.MaxBytesInUse()
    peak_memory2 = sess.run(mem_op)

    # method 3
    run_metadata = tf.RunMetadata()
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    sess.run(
        grad.op,
        run_metadata=run_metadata,
        options=run_options,
    )
    print(run_metadata)
    peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu']
    print(peak_memory1, "VLOG_MEMORY")
    print(peak_memory2, "MaxBytesInUse")
    print(peak_memory3, "metadata")

    cpu, gpu = memory_util._retrieve_cpu_gpu_stats(run_metadata)
    if cpu:
        bytes_in_use_cpu = [
            node.memory[0].allocator_bytes_in_use for node in cpu
        ]
    if gpu:
        bytes_in_use_gpu = [
            node.memory[0].allocator_bytes_in_use for node in gpu
        ]

    peak_memory4 = max(bytes_in_use_gpu)
    print(peak_memory4, "metadata max")

    # fourth way would parse "allocator_bytes_in_use
    # node_stats {
    #    node_name: "Square"
    #    all_start_micros: 1509664297214870
    #    op_start_rel_micros: 4
    #    op_end_rel_micros: 115
    #    all_end_rel_micros: 136
    #    memory {
    #      allocator_name: "GPU_0_bfc"
    #      allocator_bytes_in_use: 6013952
    #    }
    expected_peak = 3 * 10**6
    util.report_memory(peak_memory1, expected_peak)

    assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."