def test_chain(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes = make_chain_tanh(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] #linearize_lib.linearize() sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = (n)*10**6 assert peak_memory > 2e6 # "loss" tensor util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e6+10000, "Difference too large."
def test_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = 4e6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1 * 10**6, "Difference too large."
def test_chain(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes = make_chain_tanh(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] #linearize_lib.linearize() sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = (n) * 10**6 assert peak_memory > 2e6 # "loss" tensor util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e6 + 10000, "Difference too large."
def test_resnet_rewrite_memory(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds, # -1 for discarding, then recomputing a1_tanh expected_peak = (n+1+1-1)*10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = 4e6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_long_resnet(): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds expected_peak = (n+1)*10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] start_time = time.time() with tf.control_dependencies([a]): grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] start_time = time.time() if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0', # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0'] expected_peak = 18 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_memory(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="memory" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] tf.add_to_collection("checkpoints", nodes[10]) tf.add_to_collection("checkpoints", nodes[20]) #grad = memory_saving_gradients.gradients_collection([a], [a0])[0] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: added = linearize_lib.linearize() peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically as bottlenecks # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0', # 'a82:0', 'a91:0'] expected_peak = 20 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_tarjan(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="tarjan" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: added = linearize_lib.linearize() peak_memory = cpu_peak() # points picked # a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0 expected_peak = 18e6 util.report_memory(peak_memory, expected_peak) # todo: remove "REMOVE_ASSERTS" if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_tarjan(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="tarjan" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # for n=5, only choice of a2 saves memory, and alg picks a3 # hence use n>5 to avoid this edge condition nodes = util.make_chain_tanh_fill(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() expected_peak = 5e6 # originally needed 7 units, now a3,a5 are recomputed util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e5, "Difference too large."
def test_chain_memory(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="memory" strat.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # for n=5, only choice of a2 saves memory, and alg picks a3 # hence use n>5 to avoid this edge condition nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() expected_peak = (n+1-1)*10**6 # 1 for each node + 1 for generated - 1 saved # "loss" tensor util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 10000, "Difference too large."
def test_dual_chain_rewrite(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes1 = make_chain_tanh_constant(n, "a") nodes2 = make_chain_tanh_constant(n, "b") a0,b0 = nodes1[0], nodes2[0] a, b = nodes1[-1], nodes2[-1] grad = memory_saving_gradients.gradients([a+b], [a0, b0], checkpoints=[nodes1[2], nodes2[2]]) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun([grad[0].op, grad[1].op]) peak_memory = cpu_peak() # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily # also, this moves "peak memory" scenario lower down the chain # where the final addition node activations are no longer needed (another -1) expected_peak = (2*(n-1)+1)*10**6 util.report_memory(peak_memory, expected_peak) # since two independent chains, some variability in node scheduling # allow 1MB slack if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
def test_dual_chain(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes1 = make_chain_tanh_constant(n, "a") nodes2 = make_chain_tanh_constant(n, "b") a0,b0 = nodes1[0], nodes2[0] a, b = nodes1[-1], nodes2[-1] grad = tf.gradients([a+b], [a0, b0]) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun([grad[0].op, grad[1].op]) peak_memory = cpu_peak() expected_peak = (2*n+1)*10**6 util.report_memory(peak_memory, expected_peak) # 1 unit of memory slack since parallel computation chains adds # scheduling variablity if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1*10**9, "Difference too large."
def test_chain_rewrite(linearize=False): """Take chain of length 5, save 2 nodes, make sure 2 units of RAM is saved.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 a0, a1, a2, a3, a4 = make_chain_tanh(n) grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1,a3])[0] expected_peak = (n+1-2)*10**6 # subtract 2 since we recompute 2 sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e6+10000, "Difference too large."
def test_chain_rewrite(linearize=False): """Take chain of length 5, save 2 nodes, make sure 2 units of RAM is saved.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 a0, a1, a2, a3, a4 = make_chain_tanh(n) grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1, a3])[0] expected_peak = (n + 1 - 2) * 10**6 # subtract 2 since we recompute 2 sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e6 + 10000, "Difference too large."
def test_resnet_rewrite_memory(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds, # -1 for discarding, then recomputing a1_tanh expected_peak = (n + 1 + 1 - 1) * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1 * 10**6, "Difference too large."
def test_long_resnet(): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds expected_peak = (n + 1) * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] start_time = time.time() with tf.control_dependencies([a]): grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] start_time = time.time() if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0', # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0'] expected_peak = 18 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_memory(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="memory" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] tf.add_to_collection("checkpoints", nodes[10]) tf.add_to_collection("checkpoints", nodes[20]) #grad = memory_saving_gradients.gradients_collection([a], [a0])[0] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: added = linearize_lib.linearize() peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically as bottlenecks # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0', # 'a82:0', 'a91:0'] expected_peak = 20 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_tarjan(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="tarjan" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: added = linearize_lib.linearize() peak_memory = cpu_peak() # points picked # a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0 expected_peak = 18e6 util.report_memory(peak_memory, expected_peak) # todo: remove "REMOVE_ASSERTS" if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_tarjan(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="tarjan" strategy.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # for n=5, only choice of a2 saves memory, and alg picks a3 # hence use n>5 to avoid this edge condition nodes = util.make_chain_tanh_fill(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() expected_peak = 5e6 # originally needed 7 units, now a3,a5 are recomputed util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1e5, "Difference too large."
def test_chain_memory(linearize=False): """Like test_chain, but use automatic rewriting with checkpoints="memory" strat.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # for n=5, only choice of a2 saves memory, and alg picks a3 # hence use n>5 to avoid this edge condition nodes = make_chain_tanh_constant(n) a0 = nodes[0] a = nodes[-1] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) if linearize: linearize_lib.linearize() peak_memory = cpu_peak() expected_peak = (n + 1 - 1) * 10**6 # 1 for each node + 1 for generated - 1 saved # "loss" tensor util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 10000, "Difference too large."
def test_dual_chain_rewrite(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes1 = make_chain_tanh_constant(n, "a") nodes2 = make_chain_tanh_constant(n, "b") a0, b0 = nodes1[0], nodes2[0] a, b = nodes1[-1], nodes2[-1] grad = memory_saving_gradients.gradients( [a + b], [a0, b0], checkpoints=[nodes1[2], nodes2[2]]) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun([grad[0].op, grad[1].op]) peak_memory = cpu_peak() # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily # also, this moves "peak memory" scenario lower down the chain # where the final addition node activations are no longer needed (another -1) expected_peak = (2 * (n - 1) + 1) * 10**6 util.report_memory(peak_memory, expected_peak) # since two independent chains, some variability in node scheduling # allow 1MB slack if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
def test_dual_chain(): """Runs regular chain gradient, makes sure memory usage makes sense.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 nodes1 = make_chain_tanh_constant(n, "a") nodes2 = make_chain_tanh_constant(n, "b") a0, b0 = nodes1[0], nodes2[0] a, b = nodes1[-1], nodes2[-1] grad = tf.gradients([a + b], [a0, b0]) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun([grad[0].op, grad[1].op]) peak_memory = cpu_peak() expected_peak = (2 * n + 1) * 10**6 util.report_memory(peak_memory, expected_peak) # 1 unit of memory slack since parallel computation chains adds # scheduling variablity if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1 * 10**9, "Difference too large."
def test_chain_rewrite_save_first(): """Take chain of length 5, save first node.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 a0, a1, a2, a3, a4 = make_chain_tanh_constant(n) grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1, a3])[0] expected_peak = (n+1-2)*10**6 sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_rewrite_save_one_before_last(): """Take chain of length 5, save first node.""" tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 5 a0, a1, a2, a3, a4 = make_chain_tanh_constant(n) grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a2])[0] expected_peak = (n + 1 - 2) * 10**6 sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def main(): import memory_util memory_util.vlog(1) # vlog=2 on GPU machine will spam gpu "polling" msgs tf.reset_default_graph() n = 3 # TODO: fix edge case with n=2 nodes = make_chain_tanh(n) a0 = nodes[0] a = nodes[-1] #grad = memory_saving_gradients.gradients_memory([a], [a0])[0] grad = tf.gradients(a, [a0])[0] sess = create_session() sess.run(tf.global_variables_initializer()) # feed_dict = {a0, with memory_util.capture_stderr() as stderr: sess.run(grad.op) peak_memory1 = memory_util.peak_memory(stderr.getvalue()) # 20 mem used with following tensors picked automatically as bottlenecks # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0', # 'a82:0', 'a91:0'] # method 2 mem_op = tf.contrib.memory_stats.MaxBytesInUse() peak_memory2 = sess.run(mem_op) # method 3 run_metadata = tf.RunMetadata() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) sess.run(grad.op, run_metadata=run_metadata, options=run_options,) print(run_metadata) peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu'] print(peak_memory1, "VLOG_MEMORY") print(peak_memory2, "MaxBytesInUse") print(peak_memory3, "metadata") cpu,gpu=memory_util._retrieve_cpu_gpu_stats(run_metadata) if cpu: bytes_in_use_cpu = [node.memory[0].allocator_bytes_in_use for node in cpu] if gpu: bytes_in_use_gpu = [node.memory[0].allocator_bytes_in_use for node in gpu] peak_memory4 = max(bytes_in_use_gpu) print(peak_memory4, "metadata max") # fourth way would parse "allocator_bytes_in_use # node_stats { # node_name: "Square" # all_start_micros: 1509664297214870 # op_start_rel_micros: 4 # op_end_rel_micros: 115 # all_end_rel_micros: 136 # memory { # allocator_name: "GPU_0_bfc" # allocator_bytes_in_use: 6013952 # } expected_peak = 3 * 10**6 util.report_memory(peak_memory1, expected_peak) assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."
def main(): import memory_util memory_util.vlog(1) # vlog=2 on GPU machine will spam gpu "polling" msgs tf.reset_default_graph() n = 3 # TODO: fix edge case with n=2 nodes = make_chain_tanh(n) a0 = nodes[0] a = nodes[-1] #grad = memory_saving_gradients.gradients_memory([a], [a0])[0] grad = tf.gradients(a, [a0])[0] sess = create_session() sess.run(tf.global_variables_initializer()) # feed_dict = {a0, with memory_util.capture_stderr() as stderr: sess.run(grad.op) peak_memory1 = memory_util.peak_memory(stderr.getvalue()) # 20 mem used with following tensors picked automatically as bottlenecks # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0', # 'a82:0', 'a91:0'] # method 2 mem_op = tf.contrib.memory_stats.MaxBytesInUse() peak_memory2 = sess.run(mem_op) # method 3 run_metadata = tf.RunMetadata() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) sess.run( grad.op, run_metadata=run_metadata, options=run_options, ) print(run_metadata) peak_memory3 = memory_util.peak_from_metadata(run_metadata)['gpu'] print(peak_memory1, "VLOG_MEMORY") print(peak_memory2, "MaxBytesInUse") print(peak_memory3, "metadata") cpu, gpu = memory_util._retrieve_cpu_gpu_stats(run_metadata) if cpu: bytes_in_use_cpu = [ node.memory[0].allocator_bytes_in_use for node in cpu ] if gpu: bytes_in_use_gpu = [ node.memory[0].allocator_bytes_in_use for node in gpu ] peak_memory4 = max(bytes_in_use_gpu) print(peak_memory4, "metadata max") # fourth way would parse "allocator_bytes_in_use # node_stats { # node_name: "Square" # all_start_micros: 1509664297214870 # op_start_rel_micros: 4 # op_end_rel_micros: 115 # all_end_rel_micros: 136 # memory { # allocator_name: "GPU_0_bfc" # allocator_bytes_in_use: 6013952 # } expected_peak = 3 * 10**6 util.report_memory(peak_memory1, expected_peak) assert abs(peak_memory3 - expected_peak) < 10000, "Difference too large."