def test_articulation_points_resnet(): """Make sure articulation points are found correctly in resnet.""" tf.reset_default_graph() nodes = util.make_resnet(3) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) graph = nx.Graph(util.tf_ops_to_graph(all_ops)) assert util.set_equal(util.format_ops(nx.articulation_points(graph)), ['a01_add']) tf.reset_default_graph() nodes = util.make_resnet(4) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) graph = nx.Graph(util.tf_ops_to_graph(all_ops)) assert util.set_equal(util.format_ops(nx.articulation_points(graph)), ['a01_add', 'a02_add'])
def test_articulation_points_resnet(): """Make sure articulation points are found correctly in resnet.""" tf.reset_default_graph() nodes = util.make_resnet(3) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) graph = nx.Graph(util.tf_ops_to_graph(all_ops)) assert util.set_equal(util.format_ops(nx.articulation_points(graph)), ['a01_add']) tf.reset_default_graph() nodes = util.make_resnet(4) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) graph = nx.Graph(util.tf_ops_to_graph(all_ops)) assert util.set_equal(util.format_ops(nx.articulation_points(graph)), ['a01_add', 'a02_add'])
def test_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = 4e6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1 * 10**6, "Difference too large."
def test_resnet_rewrite_memory(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds, # -1 for discarding, then recomputing a1_tanh expected_peak = (n + 1 + 1 - 1) * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1 * 10**6, "Difference too large."
def test_long_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] start_time = time.time() with tf.control_dependencies([a]): grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] start_time = time.time() if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0', # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0'] expected_peak = 18 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_resnet(): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds expected_peak = (n + 1) * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() expected_peak = 4e6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_resnet_rewrite_memory(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 6 # use n>5 (see test_chain_memory) nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] checkpoints = [nodes[3], nodes[5]] # ['a03_add:0', 'a05_add:0'] grad = memory_saving_gradients.gradients_memory([a], [a0])[0] if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds, # -1 for discarding, then recomputing a1_tanh expected_peak = (n+1+1-1)*10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1*10**6, "Difference too large."
def test_long_resnet_rewrite_tarjan(linearize=False): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] start_time = time.time() with tf.control_dependencies([a]): grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0] start_time = time.time() if linearize: added = linearize_lib.linearize(grad.op) sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 20 mem used with following tensors picked automatically # ['a10_add:0', 'a19_add:0', 'a28_add:0', 'a37_add:0', 'a46_add:0', # 'a55_add:0', 'a64_add:0', 'a73_add:0', 'a82_add:0', 'a91_add:0'] expected_peak = 18 * 10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_resnet(): tf.reset_default_graph() tf_dev = tf.device('/cpu:0') tf_dev.__enter__() n = 100 nodes = make_resnet(n) a0 = nodes[0] a = nodes[-1] with tf.control_dependencies([a]): grad = tf.gradients([a], [a0])[0] sess = create_session() sessrun(tf.global_variables_initializer()) sessrun(grad.op) peak_memory = cpu_peak() # 1 for activation of each tanh node + 1 for initial backprop node # + 1 temporary memory for computing the adds expected_peak = (n+1)*10**6 util.report_memory(peak_memory, expected_peak) if not REMOVE_ASSERTS: assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_resnet_structure(): """sanity check on TF resnet structure.""" tf.reset_default_graph() nodes = util.make_resnet(3) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) desired_graph = {0: [1, 2], 1: [2], 2: [3, 4], 3: [4]} actual_graph = util.tf_ops_to_graph(all_ops) assert (util.graphs_isomorphic(actual_graph, desired_graph))
def test_resnet_structure(): """sanity check on TF resnet structure.""" tf.reset_default_graph() nodes = util.make_resnet(3) all_ops = ge.get_forward_walk_ops(seed_ops=nodes[0].op) desired_graph = {0: [1, 2], 1: [2], 2: [3, 4], 3: [4]} actual_graph = util.tf_ops_to_graph(all_ops) assert(util.graphs_isomorphic(actual_graph, desired_graph))