def test_context_manager_with_options(self): logdir = self.get_temp_dir() options = profiler.ProfilerOptions( host_tracer_level=3, python_tracer_level=1) with profiler.Profile(logdir, options): with trace.Trace('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2)
def test_profile(self): profiler.start(options={'host_tracer_level': 3}) with trace.Trace('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(profiler.ProfilerAlreadyRunningError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) self.assertIn('Mul', events) with self.assertRaises(profiler.ProfilerNotRunningError): profiler.stop()
def _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast): """Implementation of constant.""" ctx = context.context() if ctx.executing_eagerly(): if trace.enabled: with trace.Trace("tf.constant"): return _constant_eager_impl(ctx, value, dtype, shape, verify_shape) return _constant_eager_impl(ctx, value, dtype, shape, verify_shape) g = ops.get_default_graph() tensor_value = attr_value_pb2.AttrValue() tensor_value.tensor.CopyFrom( tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape, allow_broadcast=allow_broadcast)) dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype) attrs = {"value": tensor_value, "dtype": dtype_value} const_tensor = g._create_op_internal( # pylint: disable=protected-access "Const", [], [dtype_value.type], attrs=attrs, name=name).outputs[0] if op_callbacks.should_invoke_op_callbacks(): # TODO(b/147670703): Once the special-op creation code paths # are unified. Remove this `if` block. callback_outputs = op_callbacks.invoke_op_callbacks("Const", tuple(), attrs, (const_tensor, ), op_name=name, graph=g) if callback_outputs is not None: const_tensor, = callback_outputs return const_tensor
def fn(): with trace.Trace("tf.nn.relu-2x2"): tf.nn.relu(x)
def fn(): with trace.Trace("tf.convert_to_tensor-2x2"): tf.convert_to_tensor(x)
def fn(): with trace.Trace("tf.constant-2x2"): tf.constant(x)
def fn(): with trace.Trace("tf.function-identity"): identity(x)
def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_batch_size=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False): """ From tf.keras.Model. """ training._keras_api_gauge.get_cell('fit').set(True) # Legacy graph support is contained in `training_v1.Model`. version_utils.disallow_legacy_graph('Model', 'fit') self._assert_compile_was_called() self._check_call_args('fit') training._disallow_inside_tf_function('fit') if validation_split: # Create the validation data using the training data. Only supported for # `Tensor` and `NumPy` input. (x, y, sample_weight), validation_data = ( data_adapter.train_validation_split( (x, y, sample_weight), validation_split=validation_split)) if validation_data: val_x, val_y, val_sample_weight = ( data_adapter.unpack_x_y_sample_weight(validation_data)) with self.distribute_strategy.scope(), \ training_utils.RespectCompiledTrainableState(self): # Creates a `tf.data.Dataset` and handles batch and epoch iteration. data_handler = data_adapter.DataHandler( x=x, y=y, sample_weight=sample_weight, batch_size=batch_size, steps_per_epoch=steps_per_epoch, initial_epoch=initial_epoch, epochs=epochs, shuffle=shuffle, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, model=self, steps_per_execution=self._steps_per_execution) # Container that configures and calls `tf.keras.Callback`s. if not isinstance(callbacks, callbacks_module.CallbackList): callbacks = callbacks_module.CallbackList( callbacks, add_history=True, add_progbar=verbose != 0, model=self, verbose=verbose, epochs=epochs, steps=data_handler.inferred_steps) self.stop_training = False train_function = self.make_train_function() self._train_counter.assign(0) callbacks.on_train_begin() training_logs = None # Handle fault-tolerance for multi-worker. # TODO(omalleyt): Fix the ordering issues that mean this has to # happen after `callbacks.on_train_begin`. data_handler._initial_epoch = ( # pylint: disable=protected-access self._maybe_load_initial_epoch_from_ckpt(initial_epoch)) for epoch, iterator in data_handler.enumerate_epochs(): self.reset_metrics() callbacks.on_epoch_begin(epoch) with data_handler.catch_stop_iteration(): if self._update_cycle > 1: self._grad_accumulator.reset() for step in data_handler.steps(): with trace.Trace( 'TraceContext', graph_type='train', epoch_num=epoch, step_num=step, batch_size=batch_size): callbacks.on_train_batch_begin(step) if self._update_cycle > 1: for _ in range(self._update_cycle - 1): self.accumulate_function(iterator) tmp_logs = train_function(iterator) if data_handler.should_sync: context.async_wait() logs = tmp_logs # No error, now safe to assign to logs. end_step = step + data_handler.step_increment callbacks.on_train_batch_end(end_step, logs) epoch_logs = copy.copy(logs) # Run validation. if validation_data and self._should_eval(epoch, validation_freq): # Create data_handler for evaluation and cache it. if getattr(self, '_eval_data_handler', None) is None: self._eval_data_handler = data_adapter.DataHandler( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=validation_batch_size or batch_size, steps_per_epoch=validation_steps, initial_epoch=0, epochs=1, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, model=self, steps_per_execution=self._steps_per_execution) val_logs = self.evaluate( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=validation_batch_size or batch_size, steps=validation_steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, return_dict=True) val_logs = {'val_' + name: val for name, val in val_logs.items()} epoch_logs.update(val_logs) callbacks.on_epoch_end(epoch, epoch_logs) training_logs = epoch_logs if self.stop_training: break # If eval data_hanlder exists, delete it after all epochs are done. if getattr(self, '_eval_data_handler', None) is not None: del self._eval_data_handler callbacks.on_train_end(logs=training_logs) return self.history
def fit( self, x: Optional[ Union[np.ndarray, tf.Tensor, tf.data.Dataset, tf.keras.utils.Sequence] ] = None, y: Optional[ Union[np.ndarray, tf.Tensor, tf.data.Dataset, tf.keras.utils.Sequence] ] = None, batch_size: Optional[int] = None, epochs: int = 1, verbose: int = 1, callbacks: Optional[List[Callback]] = None, validation_split: float = 0.0, validation_data: Optional[Any] = None, shuffle: bool = True, class_weight: Optional[Dict[int, float]] = None, sample_weight: Optional[np.ndarray] = None, initial_epoch: int = 0, steps_per_epoch: Optional[int] = None, validation_steps: Optional[int] = None, validation_batch_size: Optional[int] = None, validation_freq: int = 1, max_queue_size: int = 10, workers: int = 1, use_multiprocessing: bool = False, ) -> History: """Trains the model for a fixed number of epochs (iterations on a dataset). Args: x: Input data. y: Target data. batch_size: Number of samples per gradient update. epochs: Number of epochs to train the model. verbose: Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. callbacks: List of `keras.callbacks.Callback` instances. validation_split: Fraction of the training data to be used as validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. shuffle: whether to shuffle the training data before each epoch class_weight: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). sample_weight: Optional Numpy array of weights for the training samples, used for weighting the loss function (during training only). initial_epoch: Epoch at which to start training steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. validation_steps: Total number of steps (batches of samples) to draw before stopping when performing validation at the end of every epoch. validation_batch_size: Number of samples per validation batch. validation_freq: specifies how many training epochs to run before a new validation run is performed max_queue_size: Maximum size for the generator queue. workers: Maximum number of processes to spin up when using process-based threading. use_multiprocessing: If `True`, use process-based threading. Returns: A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). Raises: RuntimeError: 1. If the model was never compiled or, 2. If `model.fit` is wrapped in `tf.function`. ValueError: In case of mismatch between the provided input data and what the model expects. """ base_layer.keras_api_gauge.get_cell("fit").set(True) # Legacy graph support is contained in `training_v1.Model`. version_utils.disallow_legacy_graph("Model", "fit") self._assert_compile_was_called() self._check_call_args("fit") training._disallow_inside_tf_function("fit") if validation_split: # Create the validation data using the training data. Only supported for # `Tensor` and `NumPy` input. ( (x, y, sample_weight), validation_data, ) = data_adapter.train_validation_split( (x, y, sample_weight), validation_split=validation_split ) if validation_data: val_x, val_y, val_sample_weight = data_adapter.unpack_x_y_sample_weight( validation_data ) with self.distribute_strategy.scope(), ( training_utils.RespectCompiledTrainableState(self) ): # Creates a `tf.data.Dataset` and handles batch and epoch iteration. # Use our own custom data handler to handle increasing batch size data_handler = CustomDataHandler( x=x, y=y, sample_weight=sample_weight, batch_size=batch_size, steps_per_epoch=steps_per_epoch, initial_epoch=initial_epoch, epochs=epochs, shuffle=shuffle, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, model=self, steps_per_execution=self._steps_per_execution, ) # Container that configures and calls `tf.keras.Callback`s. if not isinstance(callbacks, training.callbacks_module.CallbackList): callbacks = training.callbacks_module.CallbackList( callbacks, add_history=True, add_progbar=verbose != 0, model=self, verbose=verbose, epochs=epochs, steps=data_handler.inferred_steps, ) self.stop_training = False train_function = self.make_train_function() self._train_counter.assign(0) callbacks.on_train_begin() training_logs = None # Handle fault-tolerance for multi-worker. data_handler._initial_epoch = self._maybe_load_initial_epoch_from_ckpt( # pylint: disable=protected-access # noqa: E501 initial_epoch ) logs = None for epoch, iterator in data_handler.enumerate_epochs(): self.reset_metrics() callbacks.on_epoch_begin(epoch) with data_handler.catch_stop_iteration(): for step in data_handler.steps(): with trace.Trace( "TraceContext", graph_type="train", epoch_num=epoch, step_num=step, batch_size=batch_size, ): callbacks.on_train_batch_begin(step) tmp_logs = train_function(iterator) if data_handler.should_sync: context.async_wait() logs = tmp_logs # No error, now safe to assign to logs. end_step = step + data_handler.step_increment callbacks.on_train_batch_end(end_step, logs) logs = tf_utils.sync_to_numpy_or_python_type(logs) epoch_logs = copy.copy(logs) # Run validation. if validation_data and self._should_eval(epoch, validation_freq): # Create data_handler for evaluation and cache it. if getattr(self, "_eval_data_handler", None) is None: self._eval_data_handler = CustomDataHandler( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=validation_batch_size or batch_size, steps_per_epoch=validation_steps, initial_epoch=0, epochs=1, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, model=self, steps_per_execution=self._steps_per_execution, ) val_logs = self.evaluate( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=validation_batch_size or batch_size, steps=validation_steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, return_dict=True, ) val_logs = {"val_" + name: val for name, val in val_logs.items()} epoch_logs.update(val_logs) callbacks.on_epoch_end(epoch, epoch_logs) training_logs = epoch_logs if self.stop_training: break # If _eval_data_handler exists, delete it after all epochs are done. if getattr(self, "_eval_data_handler", None) is not None: del self._eval_data_handler callbacks.on_train_end(logs=training_logs) return self.history
def RunTest(self, run_params): with trace.Trace(run_params.test_name): should_run, reason_for_skipping = self.ShouldRunTest(run_params) if not should_run: return self.skipTest(reason_for_skipping) saved_model_dir = self._MakeSavedModel(run_params) np.random.seed(12345) # Fix the seed so the test is deterministic. inputs_data = [] input_specs = self._GetParamsCached().input_specs for dim_list in self._GetParamsCached().input_dims: assert len(input_specs) == len(dim_list) current_input_data = [] for spec, np_shape in zip(input_specs, dim_list): np_dtype = spec.dtype.as_numpy_dtype() # Multiply the input by some constant to avoid all zeros input for # integer types. scale = 10.0 if np.issubdtype(np_dtype, np.integer) else 1.0 # TODO(laigd): add debug options. E.g. we can set the input data to be # continuous natural numbers: # seq = np.arange(np.prod(np_shape)) # seq.resize(np_shape) # current_inputs_data.append(scale * seq.astype(np_dtype)) data = (scale * np.random.random_sample(np_shape)).astype(np_dtype) if run_params.is_v2: with ops.device("/GPU:0"): data = ops.convert_to_tensor(data) current_input_data.append(data) inputs_data.append(current_input_data) # Verify the original graph. self._VerifyGraphDef(run_params, saved_model_dir, saved_model_dir, GraphState.ORIGINAL) # Run the original graph without TensorRT to get the reference result. logging.info("Running original graph w/o TensorRT\n") ref_result = self._RunGraph( run_params, saved_model_dir, inputs_data, GraphState.ORIGINAL, num_runs=1) # Run calibration if necessary. if IsQuantizationWithCalibration(run_params): infer_saved_model_dir = self._GetCalibratedInferGraph( run_params, saved_model_dir, inputs_data) self._VerifyGraphDef(run_params, saved_model_dir, infer_saved_model_dir, GraphState.INFERENCE) elif not run_params.convert_online: infer_saved_model_dir = self._GetInferGraph(run_params, saved_model_dir) self._VerifyGraphDef(run_params, saved_model_dir, infer_saved_model_dir, GraphState.INFERENCE) else: infer_saved_model_dir = saved_model_dir # Run the inference graph, either using the converted graph or the # original graph with convert_online == True. logging.info("Running final inference graph\n") result = self._RunGraph(run_params, infer_saved_model_dir, inputs_data, GraphState.INFERENCE) self.assertAllClose( ref_result, result, atol=self.ExpectedAbsoluteTolerance(run_params), rtol=self.ExpectedRelativeTolerance(run_params))