for i in range(args.max_epoch): sess.run(iterator.initializer) _ = sess.run(inc_epoch_op) while True: try: images_train, labels_train = sess.run(next_element) feed_dict = { inputs: images_train, labels: labels_train, phase_train_placeholder: True } start = time.time() _, total_loss_val, inference_loss_val, reg_loss_val, _, acc_val = \ sess.run([train_op, total_loss, inference_loss, regularization_losses, inc_global_step_op, Accuracy_Op], feed_dict=feed_dict, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) end = time.time() pre_sec = args.train_batch_size / (end - start) count += 1 # print training information if count > 0 and count % args.show_info_interval == 0: print( 'epoch %d, total_step %d, total loss is %.2f , inference loss is %.2f, reg_loss is %.2f, training accuracy is %.6f, time %.3f samples/sec' % (i, count, total_loss_val, inference_loss_val, np.sum(reg_loss_val), acc_val, pre_sec)) # save summary if count > 0 and count % args.summary_interval == 0: feed_dict = { inputs: images_train,
def before_run(self, run_context): self._curr_iter=self._curr_iter+1 if self._curr_iter > self._start_iter and self._curr_iter <= self._end_iter: return tf.estimator.SessionRunArgs(None, options=config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)) else: return None
def setUpClass(cls): cls._dump_root = tempfile.mkdtemp() cls._is_gpu_available = test.is_gpu_available() if cls._is_gpu_available: cls._main_device = "/job:localhost/replica:0/task:0/gpu:0" else: cls._main_device = "/job:localhost/replica:0/task:0/cpu:0" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) u_name = "simple_mul_add/u" v_name = "simple_mul_add/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="simple_mul_add/matmul") x = math_ops.add(w, w, name="simple_mul_add/add") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root) # Invoke Session.run(). run_metadata = config_pb2.RunMetadata() sess.run(x, options=run_options, run_metadata=run_metadata) cls._debug_dump = debug_data.DebugDumpDir( cls._dump_root, partition_graphs=run_metadata.partition_graphs) # Construct the analyzer. cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) # Construct the handler registry. cls._registry = debugger_cli_common.CommandHandlerRegistry() # Register command handlers. cls._registry.register_command_handler( "list_tensors", cls._analyzer.list_tensors, cls._analyzer.get_help("list_tensors"), prefix_aliases=["lt"]) cls._registry.register_command_handler( "node_info", cls._analyzer.node_info, cls._analyzer.get_help("node_info"), prefix_aliases=["ni"]) cls._registry.register_command_handler( "print_tensor", cls._analyzer.print_tensor, cls._analyzer.get_help("print_tensor"), prefix_aliases=["pt"])
def testArithmeticOptimizationActive(self): """Tests that tfdbg can dump the tensor from nodes created by Grappler.""" with session.Session( config=_grappler_enabled_session_config()) as sess: u = variables.VariableV1([[1, 2], [3, 4]], name="u", dtype=dtypes.float32) # The next two ops should be optimized by Grappler into a single op: # either an AddN op or a Mul op. x = math_ops.add(u, u) x = math_ops.add(x, u) y = math_ops.multiply(x, u) sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=[self._debug_url]) run_metadata = config_pb2.RunMetadata() run_result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose(run_result, [[3, 12], [27, 48]]) dump_data = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=True) original_node_names = set( [op.name for op in sess.graph.get_operations()]) dumped_node_names = set(dump_data.nodes()) grappler_created_node_names = dumped_node_names - original_node_names grappler_removed_node_names = original_node_names - dumped_node_names # Assert that Grappler should have replaced some of the nodes from the # original graph with new nodes. self.assertTrue(grappler_created_node_names) self.assertTrue(grappler_removed_node_names) # Iterate through the nodes created by Grappler. One of them should be # be the result of replacing the original add ops with an AddN op or a # Mul op. found_optimized_node = False for grappler_node_name in grappler_created_node_names: node_op_type = dump_data.node_op_type(grappler_node_name) # Look for the node created by Grappler's arithmetic optimization. if ((test_util.IsMklEnabled() and node_op_type in ("_MklAddN", "Mul")) or (node_op_type in ("AddN", "Mul"))): datum = dump_data.get_tensors(grappler_node_name, 0, "DebugIdentity") self.assertEqual(1, len(datum)) self.assertAllClose(datum[0], [[3, 6], [9, 12]]) found_optimized_node = True break self.assertTrue( found_optimized_node, "Failed to find optimized node created by Grappler's arithmetic " "optimization.")
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session(config=session_debug_testlib. no_rewrite_session_config()) as sess: v_1 = variables.VariableV1(50.0, name="v_1") v_2 = variables.VariableV1(-50.0, name="v_1") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=[ "DebugIdentity(gated_grpc=true)", "DebugNumericSummary(gated_grpc=true)" ], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i % 2 == 0: self._server_1.request_watch("delta_1", 0, "DebugIdentity") self._server_1.request_watch("delta_2", 0, "DebugIdentity") self._server_1.request_unwatch("delta_1", 0, "DebugNumericSummary") self._server_1.request_unwatch("delta_2", 0, "DebugNumericSummary") else: self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") self._server_1.request_watch("delta_1", 0, "DebugNumericSummary") self._server_1.request_watch("delta_2", 0, "DebugNumericSummary") sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) # Watched debug tensors are: # Run 0: delta_[1,2]:0:DebugIdentity # Run 1: delta_[1,2]:0:DebugNumericSummary # Run 2: delta_[1,2]:0:DebugIdentity # Run 3: delta_[1,2]:0:DebugNumericSummary self.assertEqual(2, len(self._server_1.debug_tensor_values)) if i % 2 == 0: self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1. debug_tensor_values["delta_2:0:DebugIdentity"]) else: self.assertAllClose( [[ 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0 ]], self._server_1. debug_tensor_values["delta_1:0:DebugNumericSummary"]) self.assertAllClose( [[ 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -5.0, -5.0, -5.0, 0.0, 1.0, 0.0 ]], self._server_1. debug_tensor_values["delta_2:0:DebugNumericSummary"])
def train_step(sess, train_op, global_step, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: An `Operation` that evaluates the gradients and returns the total loss. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. Raises: ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not. """ start_time = time.time() trace_run_options = None run_metadata = None if 'should_trace' in train_step_kwargs: if 'logdir' not in train_step_kwargs: raise ValueError( 'logdir must be present in train_step_kwargs when ' 'should_trace is present') if sess.run(train_step_kwargs['should_trace']): trace_run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() total_loss, np_global_step = sess.run([train_op, global_step], options=trace_run_options, run_metadata=run_metadata) time_elapsed = time.time() - start_time if run_metadata is not None: tl = timeline.Timeline(run_metadata.step_stats) trace = tl.generate_chrome_trace_format() trace_filename = os.path.join(train_step_kwargs['logdir'], 'tf_trace-%d.json' % np_global_step) logging.info('Writing trace to %s', trace_filename) file_io.write_string_to_file(trace_filename, trace) if 'summary_writer' in train_step_kwargs: train_step_kwargs['summary_writer'].add_run_metadata( run_metadata, 'run_metadata-%d' % np_global_step) if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('global step %d: loss = %.4f (%.2f sec/step)', np_global_step, total_loss, time_elapsed) # TODO(nsilberman): figure out why we can't put this into sess.run. The # issue right now is that the stop check depends on the global step. The # increment of global step often happens via the train op, which used # created using optimizer.apply_gradients. # # Since running `train_op` causes the global step to be incremented, one # would expected that using a control dependency would allow the # should_stop check to be run in the same session.run call: # # with ops.control_dependencies([train_op]): # should_stop_op = ... # # However, this actually seems not to work on certain platforms. if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop
def testDumpCausalityCheck(self): with session.Session() as sess: u_name = "testDumpCausalityCheck/u" v_name = "testDumpCausalityCheck/v" w_name = "testDumpCausalityCheck/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) # First, loading the original dump without supplying the # partition_graphs should not cause a RuntimeError, validation occurs # only with partition_graphs loaded. debug_data.DebugDumpDir(self._dump_root) # Now, loading the original dump with partition graphs supplied should # succeed. The validation should pass quietly. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Get the dump file names and compute their timestamps. self.assertEqual( 1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity"))) u_file_path = dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")[0] self.assertEqual( 1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity"))) v_file_path = dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")[0] u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1:]) v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1:]) # Swap the time stamps new_u_file_path = u_file_path[:u_file_path. rindex("_")] + "_%d" % v_timestamp new_v_file_path = v_file_path[:v_file_path. rindex("_")] + "_%d" % u_timestamp os.rename(u_file_path, new_u_file_path) os.rename(v_file_path, new_v_file_path) # Load the dump directory again. Now a ValueError is expected to be # raised due to the timestamp swap. with self.assertRaisesRegexp(ValueError, "Causality violated"): dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Loading the dump directory with kwarg "validate" set explicitly to # False should get rid of the error. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=False)
def before_run(self, run_context): if not self._session_wrapper: self._session_wrapper = local_cli_wrapper.LocalCLIDebugWrapperSession( run_context.session, ui_type=self._ui_type, dump_root=self._dump_root, thread_name_filter=self._thread_name_filter) # Actually register tensor filters registered prior to the construction # of the underlying LocalCLIDebugWrapperSession object. for filter_name in self._pending_tensor_filters: self._session_wrapper.add_tensor_filter( filter_name, self._pending_tensor_filters[filter_name]) # Increment run call counter. self._session_wrapper.increment_run_call_count() # Adapt run_context to an instance of OnRunStartRequest for invoking # superclass on_run_start(). on_run_start_request = framework.OnRunStartRequest( run_context.original_args.fetches, run_context.original_args.feed_dict, None, None, self._session_wrapper.run_call_count) on_run_start_response = self._session_wrapper.on_run_start( on_run_start_request) self._performed_action = on_run_start_response.action run_args = session_run_hook.SessionRunArgs( None, feed_dict=None, options=config_pb2.RunOptions()) if self._performed_action == framework.OnRunStartAction.DEBUG_RUN: # pylint: disable=protected-access self._session_wrapper._decorate_run_options_for_debug( run_args.options, on_run_start_response.debug_urls, debug_ops=on_run_start_response.debug_ops, node_name_regex_whitelist=( on_run_start_response.node_name_regex_whitelist), op_type_regex_whitelist=( on_run_start_response.op_type_regex_whitelist), tensor_dtype_regex_whitelist=( on_run_start_response.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( on_run_start_response.tolerate_debug_op_creation_failures)) # pylint: enable=protected-access elif self._performed_action == framework.OnRunStartAction.PROFILE_RUN: # pylint: disable=protected-access self._session_wrapper._decorate_run_options_for_profile( run_args.options) # pylint: enable=protected-access elif self._performed_action == framework.OnRunStartAction.INVOKE_STEPPER: # The _finalized property must be set to False so that the NodeStepper # can insert ops for retrieving TensorHandles. # pylint: disable=protected-access run_context.session.graph._finalized = False # pylint: enable=protected-access with stepper.NodeStepper( run_context.session, run_context.original_args.fetches, run_context.original_args.feed_dict) as node_stepper: self._session_wrapper.invoke_node_stepper( node_stepper, restore_variable_values_on_exit=True) return run_args
def testFindNodesWithBadTensorValues(self): with session.Session() as sess: u_name = "testFindNodesWithBadTensorValues/u" v_name = "testFindNodesWithBadTensorValues/v" w_name = "testFindNodesWithBadTensorValues/w" x_name = "testFindNodesWithBadTensorValues/x" y_name = "testFindNodesWithBadTensorValues/y" z_name = "testFindNodesWithBadTensorValues/z" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) v = variables.Variable(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.sub(u, v, name=w_name) # Expected output: [inf, 1.3333] x = math_ops.div(u, w, name=x_name) # Expected output: [nan, 4.0] y = math_ops.mul(w, x, name=y_name) z = math_ops.mul(y, y, name=z_name) u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) def has_bad_value(_, tensor): return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor)) # Find all "offending tensors". bad_data = dump.find(has_bad_value) # Verify that the nodes with bad values are caught through running find # on the debug dump. self.assertEqual(3, len(bad_data)) self.assertEqual(x_name, bad_data[0].node_name) self.assertEqual(y_name, bad_data[1].node_name) self.assertEqual(z_name, bad_data[2].node_name) # Test first_n kwarg of find(): Find the first offending tensor. first_bad_datum = dump.find(has_bad_value, first_n=1) self.assertEqual(1, len(first_bad_datum)) self.assertEqual(x_name, first_bad_datum[0].node_name)
def testDumpGraphStructureLookup(self): # TODO(cais): Separate this test into multiple test methods. with session.Session() as sess: u_name = "testDumpGraphStructureLookup/u" v_name = "testDumpGraphStructureLookup/v" w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) u_read_name = u_name + "/read" # Test node name list lookup of the DebugDumpDir object. node_names = dump.nodes() self.assertTrue(u_name in node_names) self.assertTrue(u_read_name in node_names) # Test querying node attributes. u_attr = dump.node_attributes(u_name) self.assertEqual(dtypes.float32, u_attr["dtype"].type) self.assertEqual(1, len(u_attr["shape"].shape.dim)) self.assertEqual(2, u_attr["shape"].shape.dim[0].size) with self.assertRaisesRegexp(ValueError, "No node named \"foo\" exists"): dump.node_attributes("foo") # Test querying the debug watch keys with node names. self.assertEqual(["%s:0:DebugIdentity" % u_name], dump.debug_watch_keys(u_name)) self.assertEqual(["%s:0:DebugIdentity" % v_name], dump.debug_watch_keys(v_name)) self.assertEqual(["%s:0:DebugIdentity" % w_name], dump.debug_watch_keys(w_name)) self.assertEqual([], dump.debug_watch_keys("foo")) # Test querying debug datum instances from debug watch. u_data = dump.watch_key_to_data(dump.debug_watch_keys(u_name)[0]) self.assertEqual(1, len(u_data)) self.assertEqual(u_name, u_data[0].node_name) self.assertEqual(0, u_data[0].output_slot) self.assertEqual("DebugIdentity", u_data[0].debug_op) self.assertGreaterEqual(u_data[0].timestamp, 0) self.assertEqual([], dump.watch_key_to_data("foo")) # Test the inputs lookup of the DebugDumpDir object. self.assertEqual([], dump.node_inputs(u_name)) self.assertEqual([u_name], dump.node_inputs(u_read_name)) self.assertEqual([u_read_name] * 2, dump.node_inputs(v_name)) self.assertEqual([v_name] * 2, dump.node_inputs(w_name)) self.assertEqual([], dump.node_inputs(u_name, is_control=True)) self.assertEqual([], dump.node_inputs(u_read_name, is_control=True)) self.assertEqual([], dump.node_inputs(v_name, is_control=True)) self.assertEqual([], dump.node_inputs(w_name, is_control=True)) # Test the outputs recipient lookup of the DebugDumpDir object. self.assertTrue(u_read_name in dump.node_recipients(u_name)) self.assertEqual(2, dump.node_recipients(u_read_name).count(v_name)) self.assertEqual(2, dump.node_recipients(v_name).count(w_name)) self.assertEqual([], dump.node_recipients(u_name, is_control=True)) self.assertEqual([], dump.node_recipients(u_read_name, is_control=True)) self.assertEqual([], dump.node_recipients(v_name, is_control=True)) self.assertEqual([], dump.node_recipients(w_name, is_control=True)) # Test errors raised on invalid node names. with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_inputs(u_name + "foo") with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_recipients(u_name + "foo") # Test transitive_inputs(). self.assertEqual([], dump.transitive_inputs(u_name)) self.assertEqual([u_name], dump.transitive_inputs(u_read_name)) self.assertEqual(set([u_name, u_read_name]), set(dump.transitive_inputs(v_name))) self.assertEqual(set([u_name, u_read_name, v_name]), set(dump.transitive_inputs(w_name))) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.transitive_inputs(u_name + "foo") # Test num_devices(). self.assertEqual(self._expected_num_devices, len(dump.devices())) # Test node_device(). self.assertEqual(self._main_device, dump.node_device(u_name)) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_device(u_name + "foo") # Test node_exists(). self.assertTrue(dump.node_exists(u_name)) self.assertTrue(dump.node_exists(u_name + "/read")) self.assertFalse(dump.node_exists(u_name + "/read" + "/foo")) # Test node_op_type(). self.assertEqual("Variable", dump.node_op_type(u_name)) self.assertEqual("Identity", dump.node_op_type(u_name + "/read")) self.assertEqual("Add", dump.node_op_type(v_name)) self.assertEqual("Add", dump.node_op_type(w_name)) with self.assertRaisesRegexp(ValueError, "does not exist in partition graphs"): dump.node_op_type(u_name + "foo")
def testDumpToFileWhileLoop(self): with session.Session() as sess: num_iter = 10 # "u" is the Variable being updated in the loop. u_name = "testDumpToFileWhileLoop/u" u_namespace = u_name.split("/")[0] u_init_val = np.array(11.0) u_init = constant_op.constant(u_init_val) u = variables.Variable(u_init, name=u_name) # "v" is the increment. v_name = "testDumpToFileWhileLoop/v" v_namespace = v_name.split("/")[0] v_init_val = np.array(2.0) v_init = constant_op.constant(v_init_val) v = variables.Variable(v_init, name=v_name) u.initializer.run() v.initializer.run() i = constant_op.constant(0, name="testDumpToFileWhileLoop/i") def cond(i): return math_ops.less(i, num_iter) def body(i): new_u = state_ops.assign_add(u, v) new_i = math_ops.add(i, 1) op = control_flow_ops.group(new_u) new_i = control_flow_ops.with_dependencies([op], new_i) return [new_i] loop = control_flow_ops.while_loop(cond, body, [i], parallel_iterations=1) # Create RunOptions for debug-watching tensors run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) # Add debug tensor watch for while/Identity. debug_utils.add_debug_tensor_watch(run_options, "while/Identity", 0, debug_urls=debug_urls) # Add debug tensor watch for while/Add/y. debug_utils.add_debug_tensor_watch(run_options, "while/Add/y", 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() r = sess.run(loop, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) self.assertEqual(num_iter, r) u_val_final = sess.run(u) self.assertAllClose(u_init_val + num_iter * v_init_val, u_val_final) # Verify dump files self.assertTrue(os.path.isdir(self._dump_root)) self.assertTrue( os.path.isdir(os.path.join(self._dump_root, u_namespace))) self.assertTrue( os.path.isdir(os.path.join(self._dump_root, v_namespace, "v"))) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Expected dumped tensors: u, v/read, 10 iterations of while/Identity, # and 10 iterations of while/Add/y. self.assertEqual(1 + 1 + num_iter + num_iter, dump.size) # Verify tensor values. self.assertAllClose([u_init_val], dump.get_tensors(u_name, 0, "DebugIdentity")) self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) while_id_tensors = dump.get_tensors("while/Identity", 0, "DebugIdentity") self.assertEqual(10, len(while_id_tensors)) for k in xrange(len(while_id_tensors)): self.assertAllClose(np.array(k), while_id_tensors[k]) # Verify ascending timestamps from the while loops. while_id_rel_timestamps = dump.get_rel_timestamps( "while/Identity", 0, "DebugIdentity") self.assertEqual(10, len(while_id_rel_timestamps)) prev_rel_time = 0 for rel_time in while_id_rel_timestamps: self.assertGreaterEqual(rel_time, prev_rel_time) prev_rel_time = rel_time # Test querying debug watch keys from node name. watch_keys = dump.debug_watch_keys("while/Identity") self.assertEqual(["while/Identity:0:DebugIdentity"], watch_keys) # Test querying debug datum instances from debug watch key. self.assertEqual(10, len(dump.watch_key_to_data(watch_keys[0]))) self.assertEqual([], dump.watch_key_to_data("foo"))
def testDumpStringTensorsToFileSystem(self): with session.Session() as sess: str1_init_val = np.array(b"abc") str2_init_val = np.array(b"def") str1_init = constant_op.constant(str1_init_val) str2_init = constant_op.constant(str2_init_val) str1_name = "str1" str2_name = "str2" str1 = variables.Variable(str1_init, name=str1_name) str2 = variables.Variable(str2_init, name=str2_name) # Concatenate str1 and str2 str_concat = math_ops.add(str1, str2, name="str_concat") str1.initializer.run() str2.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str1_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str2_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() sess.run(str_concat, options=run_options, run_metadata=run_metadata) # String ops are located on CPU. self.assertEqual(1, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertIn(str1_name, dump.nodes()) self.assertIn(str2_name, dump.nodes()) self.assertEqual(2, dump.size) self.assertEqual([str1_init_val], dump.get_tensors("%s/read" % str1_name, 0, "DebugIdentity")) self.assertEqual([str2_init_val], dump.get_tensors("%s/read" % str2_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str1_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0], 0)
def main(): mkdir_('./tmp/') mkdir_('./weights/') mkdir_('./data/') zipfile.ZipFile('data.zip').extractall('./data') width, height = 500, 500 ''' Batch : 한번에 training 할 영상 개수 [] lr : 학습에 사용될 learning rate k : convolution kernel의 사이즈 block : network model에 사용되는 block의 개수 ch : 각 layer의 channel 개수 patch : 하나의 큰 영상에서 여러 작은 조각 영상을 가져올때 사용되는 조각의 크기 Epo : 학습에 사용된 data를 몇 번 학습할지 설정 ''' batch = 32 lr = 1e-4 k = 3 block = 3 ch = 64 patch = 64 Epo = 200 # MSE, MAE, CHA 선택 가능 loss_type = 'MSE' NumImg = len(os.listdir('./data')) NumValid = int(0.2 * NumImg // 2) NumTrain = int(0.8 * NumImg // 2) tot, scat = [], [] for i in range(NumImg // 2): img = imgload('./data/input%4.4d.raw' % i) lab = imgload('./data/label%4.4d.raw' % i) v_max = np.max(img) v_min = np.min(img) img, _, _ = MinMaxNorm(img, v_max, v_min, [width, height]) lab, _, _ = MinMaxNorm(lab, v_max, v_min, [width, height]) tot.extend(patch_ext(img, patch)) scat.extend(patch_ext(lab, patch)) NumTrain = int(0.8 * np.shape(tot)[0]) NumValid = int(0.2 * np.shape(tot)[0]) idx_train, idx_valid = [], [] for i in range(NumTrain // batch): idx_train.append(i) for i in range(NumValid // batch): idx_valid.append(NumTrain // batch + i) x = tf.placeholder(tf.float32, shape=[batch, patch, patch]) y = tf.placeholder(tf.float32, shape=[batch, patch, patch]) x_ = tf.reshape(x, [batch, patch, patch, 1]) y_ = tf.reshape(y, [batch, patch, patch, 1]) ######### Network model ######### net = Network() tensor = net.conv2d(x_, [k, k, ch], 'LReLU') tensor1 = tensor for i in range(block): tensor_ = net.conv2d(tensor, [k, k, ch], 'LReLU') tensor = net.conv2d(tensor_, [k, k, ch], 'LReLU') tensor = net.conv2d(tensor, [k, k, ch], 'LReLU') tensor = net.skip_connect(tensor, tensor_) tensor = net.skip_connect(tensor, tensor1) output = net.conv2d(tensor, [k, k, 1], 'Linear') ################################# tot_loss = net.loss_cal(output, y_, loss_type) opt = tf.train.AdamOptimizer(lr).minimize(tot_loss) saver = tf.train.Saver(tf.global_variables()) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) start = time.time() mkdir_('./weights/error') for epoch in range(Epo): shuffle(idx_train) shuffle(idx_valid) E_t = open('./weights/error/E_t' + str(epoch) + '.txt', 'w') for idx in range(NumTrain // batch): Input = tot[idx_train[idx] * batch:idx_train[idx] * batch + batch][:][:] Label = scat[idx_train[idx] * batch:idx_train[idx] * batch + batch][:][:] _, l = sess.run([opt, tot_loss], feed_dict={ x: Input, y: Label }, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) e = "%0.8f\n" % l E_t.write(e) end = time.time() print("[Epoch %2d (%6d/%d)] loss %.7f\t %.2f sec" % (epoch, idx, NumTrain // batch, l, end - start)) E_t.close() E_v = open('./weights/error/E_v' + str(epoch) + '.txt', 'w') for idx in range(NumValid // batch): Input = tot[idx_valid[idx] * batch:idx_valid[idx] * batch + batch][:][:] Label = scat[idx_valid[idx] * batch:idx_valid[idx] * batch + batch][:][:] l = sess.run(tot_loss, feed_dict={x: Input, y: Label}) e = "%0.8f\n" % l E_v.write(e) end = time.time() print("[Epoch %2d (%6d/%d)] loss %.7f\t %.2f sec" % (epoch, idx, NumValid // batch, l, end - start)) E_v.close() saver.save(sess, './tmp/weights.ckpt') if os.path.isdir('./weights/' + str(epoch + 1)): shutil.rmtree('./weights/' + str(epoch + 1)) shutil.copytree('./tmp', './weights/' + str(epoch + 1)) sess.close() tf.reset_default_graph() cuda.select_device(0) cuda.close()
def testMinOption(self): ops.reset_default_graph() def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): for n in nodes: if mm > 0: self.assertGreaterEqual(n.exec_micros, mm) if mam > 0: self.assertGreaterEqual(n.accelerator_exec_micros, mam) if mcm > 0: self.assertGreaterEqual(n.cpu_exec_micros, mcm) if mb > 0: self.assertGreaterEqual(n.requested_bytes, mb) if mpb > 0: self.assertGreaterEqual(n.peak_bytes, mpb) if mrb > 0: self.assertGreaterEqual(n.residual_bytes, mrb) if mob > 0: self.assertGreaterEqual(n.output_bytes, mob) check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) with session.Session(config=self._no_rewrite_session_config()) as sess: x = lib.BuildSmallModel() self.evaluate(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) min_val = random.randint(0, 10000) opts = builder(builder.time_and_memory(min_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mm=min_val) opts = builder(builder.time_and_memory(min_accelerator_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mam=min_val) opts = builder(builder.time_and_memory(min_cpu_micros=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mcm=min_val) opts = builder(builder.time_and_memory(min_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mb=min_val) opts = builder(builder.time_and_memory(min_peak_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mpb=min_val) opts = builder(builder.time_and_memory(min_residual_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mrb=min_val) opts = builder(builder.time_and_memory(min_output_bytes=min_val) ).with_empty_output().build() tfprof_node = model_analyzer.profile( sess.graph, run_meta=run_meta, options=opts) check_min(tfprof_node.children, mob=min_val)
def testWatchingUnconnectedOutputTensor(self): """Watch an output slot not emitting any edges. (Not even control edges from the node.) """ with session.Session() as sess: x_init = constant_op.constant([2, 2, 3, 5, 5]) x = variables.Variable(x_init, name="unconnected/x") # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the # graph. Let the debugger watch the unused slot 1. unique_x, _ = tf.unique(x, name="unconnected/unique_x") y = tf.add(unique_x, [0, 1, 2], name="unconnected/y") x.initializer.run() # Verify that only slot 0 of unique_x has recipients, while slot 1 of the # same node does not have recipients. unique_x_slot_0_recipients = [] unique_x_slot_1_recipients = [] for op in sess.graph.get_operations(): for inp in op.inputs: if inp.name == "unconnected/unique_x:0": unique_x_slot_0_recipients.append(op.name) elif inp.name == "unconnected/unique_x:1": unique_x_slot_1_recipients.append(op.name) self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients) self.assertEqual([], unique_x_slot_1_recipients) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose([2, 4, 7], result) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the connected slot (slot 0) is dumped properly. unique_x_slot_0_dumps = dump.watch_key_to_data( "unconnected/unique_x:0:DebugIdentity") self.assertEqual(1, len(unique_x_slot_0_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_0_dumps[0].node_name) self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot) self.assertAllClose([2, 3, 5], unique_x_slot_0_dumps[0].get_tensor()) # Assert that the unconnected slot (slot 1) is dumped properly. unique_x_slot_1_dumps = dump.watch_key_to_data( "unconnected/unique_x:1:DebugIdentity") self.assertEqual(1, len(unique_x_slot_1_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_1_dumps[0].node_name) self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot) self.assertAllClose([0, 0, 1, 2, 2], unique_x_slot_1_dumps[0].get_tensor())
def run_op_benchmark(self, sess, op_or_tensor, feed_dict=None, burn_iters=2, min_iters=10, store_trace=False, store_memory_usage=True, name=None, extras=None, mbs=0): """Run an op or tensor in the given session. Report the results. Args: sess: `Session` object to use for timing. op_or_tensor: `Operation` or `Tensor` to benchmark. feed_dict: A `dict` of values to feed for each op iteration (see the `feed_dict` parameter of `Session.run`). burn_iters: Number of burn-in iterations to run. min_iters: Minimum number of iterations to use for timing. store_trace: Boolean, whether to run an extra untimed iteration and store the trace of iteration in returned extras. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". Note that trace will not be stored in test_log_pb2.TestResults proto. store_memory_usage: Boolean, whether to run an extra untimed iteration, calculate memory usage, and store that in extras fields. name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. extras: (optional) Dict mapping string keys to additional benchmark info. Values may be either floats or values that are convertible to strings. mbs: (optional) The number of megabytes moved by this op, used to calculate the ops throughput. Returns: A `dict` containing the key-value pairs that were passed to `report_benchmark`. If `store_trace` option is used, then `full_chrome_trace_format` will be included in return dictionary even though it is not passed to `report_benchmark` with `extras`. """ for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) deltas = [None] * min_iters for i in range(min_iters): start_time = time.time() sess.run(op_or_tensor, feed_dict=feed_dict) end_time = time.time() delta = end_time - start_time deltas[i] = delta extras = extras if extras is not None else {} unreported_extras = {} if store_trace or store_memory_usage: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(op_or_tensor, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) if store_trace: unreported_extras["full_trace_chrome_format"] = ( tl.generate_chrome_trace_format()) if store_memory_usage: step_stats_analysis = tl.analyze_step_stats(show_memory=True) allocator_maximums = step_stats_analysis.allocator_maximums for k, v in allocator_maximums.items(): extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes def _median(x): if not x: return -1 s = sorted(x) l = len(x) lm1 = l - 1 return (s[l // 2] + s[lm1 // 2]) / 2.0 median_delta = _median(deltas) benchmark_values = { "iters": min_iters, "wall_time": median_delta, "extras": extras, "name": name, "throughput": mbs / median_delta } self.report_benchmark(**benchmark_values) benchmark_values["extras"].update(unreported_extras) return benchmark_values
def testDumpToFileOverlappingParentDir(self): with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) v_init_val = np.array([[2.0], [-1.0]]) # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "testDumpToFile/u" v_name = "testDumpToFile/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="testDumpToFile/matmul") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = "file://%s" % self._dump_root # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # Verify the dumped tensor values for u and v. self.assertEqual(2, dump.size) self.assertAllClose([u_init_val], dump.get_tensors("%s/read" % u_name, 0, "DebugIdentity")) self.assertAllClose([v_init_val], dump.get_tensors("%s/read" % v_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % u_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)
def run_op_benchmark(self, sess, op_or_tensor, feed_dict=None, burn_iters=2, min_iters=10, store_trace=False, name=None, extras=None, mbs=0): """Run an op or output in the given session. Report the results. Args: sess: `Session` object to use for timing. op_or_tensor: `Operation` or `Output` to benchmark. feed_dict: A `dict` of values to feed for each op iteration (see the `feed_dict` parameter of `Session.run`). burn_iters: Number of burn-in iterations to run. min_iters: Minimum number of iterations to use for timing. store_trace: Boolean, whether to run an extra untimed iteration and store the trace of iteration in the benchmark report. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. extras: (optional) Dict mapping string keys to additional benchmark info. Values may be either floats or values that are convertible to strings. mbs: (optional) The number of megabytes moved by this op, used to calculate the ops throughput. Returns: A `dict` containing the key-value pairs that were passed to `report_benchmark`. """ for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) deltas = [None] * min_iters for i in range(min_iters): start_time = time.time() sess.run(op_or_tensor, feed_dict=feed_dict) end_time = time.time() delta = end_time - start_time deltas[i] = delta extras = extras if extras is not None else {} if store_trace: run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() sess.run(op_or_tensor, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) extras[ "full_trace_chrome_format"] = tl.generate_chrome_trace_format( ) def _median(x): if not x: return -1 s = sorted(x) l = len(x) lm1 = l - 1 return (s[l // 2] + s[lm1 // 2]) / 2.0 median_delta = _median(deltas) benchmark_values = { "iters": min_iters, "wall_time": median_delta, "extras": extras, "name": name, "throughput": mbs / median_delta } self.report_benchmark(**benchmark_values) return benchmark_values
def _run_with_debugging(self, run_start_resp, fetches, feed_dict, options, run_metadata, callable_runner, callable_runner_args, callable_options): """Perform a session.run() or callable with debugging.""" # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() if decorated_run_options: self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=( run_start_resp.node_name_regex_whitelist), op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist, tensor_dtype_regex_whitelist=( run_start_resp.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access if callable_options_id in self._cached_callables_from_options: callable_object = self._cached_callables_from_options[ callable_options_id] else: callable_object = self._sess._make_callable_from_options( new_callable_options) self._cached_callables_from_options[ callable_options_id] = callable_object # pylint:enable=protected-access retvals = callable_object(*callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error return retvals, OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error)
def testToggleBreakpointsWorks(self): with session.Session(config=session_debug_testlib. no_rewrite_session_config()) as sess: v_1 = variables.VariableV1(50.0, name="v_1") v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i in (0, 2): # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2. self._server_1.request_watch("delta_1", 0, "DebugIdentity", breakpoint=True) self._server_1.request_watch("delta_2", 0, "DebugIdentity", breakpoint=True) else: # Disable the breakpoint in runs 1 and 3. self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") output = sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) self.assertAllClose( [50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output) if i in (0, 2): # During runs 0 and 2, the server should have received the published # debug tensor delta:0:DebugIdentity. The breakpoint should have been # unblocked by EventReply reponses from the server. self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1. debug_tensor_values["delta_2:0:DebugIdentity"]) # After the runs, the server should have properly registered the # breakpoints due to the request_unwatch calls. self.assertSetEqual( {("delta_1", 0, "DebugIdentity"), ("delta_2", 0, "DebugIdentity")}, self._server_1.breakpoints) else: # After the end of runs 1 and 3, the server has received the requests # to disable the breakpoint at delta:0:DebugIdentity. self.assertSetEqual(set(), self._server_1.breakpoints)
def testDistributedRunWithGatedGrpcCommunicatesWithDebugServerCorrectly( self): graph = self._createGraph() with session.Session(config=self.session_config, graph=graph, target=self.server_target) as sess: sess.run(self.a.initializer) sess.run(self.b.initializer) run_options = config_pb2.RunOptions() debug_utils.watch_graph(run_options, sess.graph, node_name_regex_whitelist=r"a", debug_ops=["DebugIdentity"], debug_urls=[self.debug_server_url]) # Test gated_grpc for an op located on the worker, i.e., on the same # host as where MasterSession is. # TODO(cais): gRPC gating of debug ops does not work on partition graphs # not located on MasterSession hosts (e.g., parameter servers) yet. Make # it work. debug_utils.watch_graph( run_options, sess.graph, node_name_regex_whitelist=r"p", debug_ops=["DebugIdentity(gated_grpc=True)"], debug_urls=[self.debug_server_url]) for i in xrange(4): if i % 2 == 0: self.debug_server.request_watch("p", 0, "DebugIdentity") else: self.debug_server.request_unwatch("p", 0, "DebugIdentity") expected_p = (10.0 + 2.0 * (i + 1)) * (100.0 - 5.0 * (i + 1)) self.assertAllClose(-expected_p, sess.run(self.q, options=run_options)) self.assertEqual( 1, len(self.debug_server.core_metadata_json_strings)) core_metadata = json.loads( self.debug_server.core_metadata_json_strings[0]) self.assertEqual([], core_metadata["input_names"]) self.assertEqual(["q:0"], core_metadata["output_names"]) self.assertEqual(i, core_metadata["executor_step_index"]) if i == 0: self.assertEqual( 1, len(self.debug_server.partition_graph_defs)) # Tensor "a" is from a PS. It may take longer to arrive due to the fact # that the stream connection between the PS and the debug server is # persistent and not torn down at the end of each Session.run() self._pollingAssertDebugTensorValuesAllClose( [10.0 + 2.0 * i], "a:0:DebugIdentity") # Due to the gRPC gating of the debug op for "p", the debug tensor # should be available on odd-indexed runs. if i % 2 == 0: self.assertAllClose( [expected_p], self.debug_server. debug_tensor_values["p:0:DebugIdentity"]) else: self.assertNotIn("p:0:DebugIdentity", self.debug_server.debug_tensor_values) self.assertNotIn("b:0:DebugIdentity", self.debug_server.debug_tensor_values) self.debug_server.clear_data()
def testClusterSpecPropagationThreeServersOneCluster(self): """Boots 3 servers, ensures appropriate communication across workers. Additionally, in this cluster, we ensure the master is not the 0-th worker. Note: this test only uses one session. """ server1 = server_lib.Server.create_local_server() server2 = server_lib.Server.create_local_server() server3 = server_lib.Server.create_local_server() cluster_def = cluster_pb2.ClusterDef() job = cluster_def.job.add() job.name = 'worker' job.tasks[0] = server3.target[len('grpc://'):] job.tasks[1] = server2.target[len('grpc://'):] job.tasks[2] = server1.target[len('grpc://'):] config = config_pb2.ConfigProto(cluster_def=cluster_def) # Add ops to the devices in non-linear order. with ops.device('/job:worker/task:1'): feed1 = array_ops.placeholder(dtypes.float32, shape=(2)) const1 = constant_op.constant(2.0) mul1 = const1 * feed1 with ops.device('/job:worker/task:2'): feed2 = array_ops.placeholder(dtypes.float32, shape=(2)) const2 = constant_op.constant(2.0) mul2 = const2 * feed2 with ops.device('/job:worker/task:0'): feed0 = array_ops.placeholder(dtypes.float32, shape=(2)) const0 = constant_op.constant(2.0) mul0 = const0 * feed0 sum_op = mul0 + mul1 + mul2 ones = np.ones([2]) run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() # Run! with session.Session(server1.target, config=config) as sess: output = sess.run(sum_op, options=run_options, run_metadata=run_metadata, feed_dict={ feed1: ones, feed2: ones, feed0: ones }) self.assertAllEqual(6 * ones, output) self.assertEqual( 3, len([ dev_stats.device for dev_stats in run_metadata.step_stats.dev_stats for node_stats in dev_stats.node_stats if '/job:worker/replica:0/task:' in dev_stats.device and node_stats.node_name.startswith('Const') ]), run_metadata)
def train_step(sess, train_op, global_step, train_step_kwargs): start_time = time.time() trace_run_options = None run_metadata = None if 'should_trace' in train_step_kwargs: if 'logdir' not in train_step_kwargs: raise ValueError( 'logdir must be present in train_step_kwargs when ' 'should_trace is present') if sess.run(train_step_kwargs['should_trace']): trace_run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() np_global_step = sess.run(global_step) if np_global_step % FLAGS.update_mask_step == 0: K.set_learning_phase(False) gdp_mask_taylor = train_step_kwargs['gdp_mask_taylor'] network_filters = [] mean_tvs = [np.zeros(g[1].shape[0]) for g in gdp_mask_taylor] for i in range(len(gdp_mask_taylor)): mask_index = sess.graph.get_tensor_by_name( gdp_mask_taylor[i][0].name.replace('mask', 'mask_index')) mask_value = sess.graph.get_tensor_by_name( gdp_mask_taylor[i][0].name.replace('mask', 'mask_value')) mask_update = sess.graph.get_operation_by_name( gdp_mask_taylor[i][0].name.replace('mask', 'mask_update')[:-2]) for j in range(gdp_mask_taylor[i][0].shape[0]): sess.run(mask_update, feed_dict={mask_index: j, mask_value: 1}) for i in range(FLAGS.taylor_step): tvs = sess.run([mt[1] for mt in gdp_mask_taylor]) for g in range(len(gdp_mask_taylor)): mean_tvs[g] += tvs[g] for g in range(len(gdp_mask_taylor)): mean_tvs[g] /= FLAGS.taylor_step for t, mt in enumerate(gdp_mask_taylor): for i in range(tvs[t].shape[0]): ft = dict() ft['mask'] = mt[0] ft['index'] = i ft['tv'] = tvs[t][i] network_filters.append(ft) network_filters_sorted = sorted(network_filters, key=lambda a: -a['tv']) for i, ft in enumerate(network_filters_sorted): mask_index = sess.graph.get_tensor_by_name(ft['mask'].name.replace( 'mask', 'mask_index')) mask_value = sess.graph.get_tensor_by_name(ft['mask'].name.replace( 'mask', 'mask_value')) mask_update = sess.graph.get_operation_by_name( ft['mask'].name.replace('mask', 'mask_update')[:-2]) if i < int(FLAGS.beta * len(network_filters)): sess.run(mask_update, feed_dict={ mask_index: ft['index'], mask_value: 1 }) else: sess.run(mask_update, feed_dict={ mask_index: ft['index'], mask_value: 0 }) layer_compression_ratio = tf.get_collection('LAYER_COMPRESSION_RATIO') lrc = sess.run(layer_compression_ratio) for i in range(len(layer_compression_ratio)): print(layer_compression_ratio[i].name[:-2], lrc[i]) K.set_learning_phase(True) total_loss, np_global_step = sess.run([train_op, global_step], options=trace_run_options, run_metadata=run_metadata) time_elapsed = time.time() - start_time if run_metadata is not None: tl = timeline.Timeline(run_metadata.step_stats) trace = tl.generate_chrome_trace_format() trace_filename = os.path.join(train_step_kwargs['logdir'], 'tf_trace-%d.json' % np_global_step) logging.info('Writing trace to %s', trace_filename) file_io.write_string_to_file(trace_filename, trace) if 'summary_writer' in train_step_kwargs: train_step_kwargs['summary_writer'].add_run_metadata( run_metadata, 'run_metadata-%d' % np_global_step) if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('global step %d: loss = %.4f (%.3f sec/step)', np_global_step, total_loss, time_elapsed) if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop
def run(self, fetches, feed_dict=None, options=None, run_metadata=None): """Wrapper around Session.run() that inserts tensor watch options. Args: fetches: Same as the `fetches` arg to regular `Session.run()`. feed_dict: Same as the `feed_dict` arg to regular `Session.run()`. options: Same as the `options` arg to regular `Session.run()`. run_metadata: Same as the `run_metadata` arg to regular `Session.run()`. Returns: Simply forwards the output of the wrapped `Session.run()` call. Raises: ValueError: On invalid `OnRunStartAction` value. """ self._run_call_count += 1 # Invoke on-run-start callback and obtain response. run_start_resp = self.on_run_start( OnRunStartRequest(fetches, feed_dict, options, run_metadata, self._run_call_count)) _check_type(run_start_resp, OnRunStartResponse) if run_start_resp.action == OnRunStartAction.DEBUG_RUN: # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=run_start_resp.node_name_regex_whitelist, op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: tf_error = op_error retvals = op_error run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error) elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or run_start_resp.action == OnRunStartAction.INVOKE_STEPPER): if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER: with stepper.NodeStepper( self._sess, fetches, feed_dict) as node_stepper: retvals = self.invoke_node_stepper( node_stepper, restore_variable_values_on_exit=True) # Invoke run() method of the wrapped session. retvals = self._sess.run( fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action) else: raise ValueError( "Invalid OnRunStartAction value: %s" % run_start_resp.action) # Invoke on-run-end callback and obtain response. run_end_resp = self.on_run_end(run_end_req) _check_type(run_end_resp, OnRunEndResponse) # Currently run_end_resp is only a placeholder. No action is taken on it. return retvals
def testAllowsDifferentWatchesOnDifferentRuns(self): """Test watching different tensors on different runs of the same graph.""" with session.Session() as sess: u_init_val = [[5.0, 3.0], [-1.0, 0.0]] v_init_val = [[2.0], [-1.0]] # Use node names with overlapping namespace (i.e., parent directory) to # test concurrent, non-racing directory creation. u_name = "diff_Watch/u" v_name = "diff_Watch/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) v = variables.Variable(v_init, name=v_name) w = math_ops.matmul(u, v, name="diff_Watch/matmul") u.initializer.run() v.initializer.run() for i in range(2): run_options = config_pb2.RunOptions( output_partition_graphs=True) run_dump_root = self._debug_dump_dir(run_number=i) debug_urls = self._debug_urls(run_number=i) if i == 0: # First debug run: Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % u_name, 0, debug_urls=debug_urls) else: # Second debug run: Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % v_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() # Invoke Session.run(). sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( run_dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # Each run should have generated only one dumped tensor, not two. self.assertEqual(1, dump.size) if i == 0: self.assertAllClose([u_init_val], dump.get_tensors( "%s/read" % u_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % u_name, 0, "DebugIdentity")[0], 0) else: self.assertAllClose([v_init_val], dump.get_tensors( "%s/read" % v_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)