def on_batch_end(self, batch, logs=None): if batch == self.stop_step_in_epoch and self.should_stop: self.should_stop = False profiler.stop() logging.info( 'Profiler saved profiles for steps between %s and %s to %s', self.start_step, self.stop_step, self.log_dir)
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) kernel_stats = os.path.join(profile_dir, run, hostname + '.kernel_stats.pb') self.assertTrue(gfile.Exists(kernel_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace.json.gz') self.assertTrue(gfile.Exists(trace_file))
def on_train_end(self, logs=None): if self.profiler_type == 'trace': profiler_start_time = time.time() profiler.stop() profiler_end_time = time.time() print('-1\t0\t{}'.format(profiler_end_time - profiler_start_time), file=self.log_epoch_times) self.log_epoch_times.flush()
def test_profile_exceptions(self): logdir = self.get_temp_dir() profiler.start(logdir) with self.assertRaises(errors.AlreadyExistsError): profiler.start(logdir) profiler.stop() with self.assertRaises(errors.UnavailableError): profiler.stop()
def _stop_profiler(self): """Stops the profiler if currently active.""" if not self._profiler_started: return try: profiler.stop() except errors.UnavailableError as e: # Profiler errors should not be fatal. logging.error('Failed to stop profiler: %s', e.message) finally: self._profiler_started = False
def on_epoch_end(self, epoch, logs=None): epoch_end_time = time.time() if self.profiler_type == 'trace_per_epoch': profiler.stop() profiler_end_time = time.time() else: profiler_end_time = epoch_end_time print('{}\t{}\t{}'.format(epoch, epoch_end_time - self.epoch_start_time, profiler_end_time - epoch_end_time), file=self.log_epoch_times) self.log_epoch_times.flush()
def test_single_worker_programmatic_mode(self): """Test single worker programmatic mode.""" logdir = self.get_temp_dir() options = profiler.ProfilerOptions( host_tracer_level=2, python_tracer_level=0, device_tracer_level=1, ) profiler.start(logdir, options) _, steps, train_ds, model = _model_setup() model.fit(x=train_ds, epochs=2, steps_per_epoch=steps) profiler.stop() self._check_tools_pb_exist(logdir)
def test_profile_with_options(self): logdir = self.get_temp_dir() options = profiler.ProfilerOptions(host_tracer_level=3, python_tracer_level=1) profiler.start(logdir, options) with trace.Trace('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2)
def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( self): profiler.start("") model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model, run_eagerly=True) result = profiler.stop(save=False) assert result is not None
def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): profiler.start("") with context.graph_mode(): model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model) result = profiler.stop(save=False) assert result is not None
def on_batch_end(self, batch, logs=None): """Writes scalar summaries for metrics on every training batch. Performs profiling if current batch is in profiler_batches. """ # Don't output batch_size and batch number as TensorBoard summaries logs = logs or {} self._samples_seen += logs.get('size', 1) samples_seen_since = self._samples_seen - self._samples_seen_at_last_write if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: batch_logs = {('batch_' + k): v for k, v in logs.items() if k not in ['batch', 'size', 'num_steps']} self._write_custom_summaries(self._total_batches_seen, batch_logs) self._samples_seen_at_last_write = self._samples_seen self._total_batches_seen += 1 if self._is_profiling: profiler.stop() self._is_profiling = False
def test_profile_exceptions(self): logdir = self.get_temp_dir() profiler.start(logdir) with self.assertRaises(errors.AlreadyExistsError): profiler.start(logdir) profiler.stop() with self.assertRaises(errors.UnavailableError): profiler.stop() # Test with a bad logdir, and it correctly raises exception and deletes # profiler. # pylint: disable=anomalous-backslash-in-string profiler.start('/\/\/:123') # pylint: enable=anomalous-backslash-in-string with self.assertRaises(Exception): profiler.stop() profiler.start(logdir) profiler.stop()
nets = GAN(architecture, ngf, ndf, latent_n) gen_optimizer = tf.keras.optimizers.Adam(lr) dis_optimizer = tf.keras.optimizers.Adam(lr) gan_opt = OptGAN(nets, gen_optimizer, dis_optimizer, batch_size, latent_n) print(f'Running on TF version {tf.__version__}') tic = time.time() # tf.profiler.experimental.start('./profiler/gan_tf/') profiler.warmup() profiler.start('./profiler/gan_tf/') mean_time = tfk.metrics.Mean() for epoch in range(epochs): start_time = time.time() gan_opt.train_on_epoch(train_ds) end_time = time.time() mean_time(end_time - start_time) message = f'Epoch: {epoch + 1:4d} |' message += f'{end_time - start_time:4.2f} sec' print(message) toc = time.time() # tf.profiler.experimental.stop() profiler.stop() print(f'It took {toc-tic:4.2f} sec') print(f'Mean time per epoch is {mean_time.result():4.2f} sec') noise = tf.random.normal([1, 100]) generated_image = nets.gen(noise, training=False) # plt.show()
def on_train_end(self, logs=None): if self._is_profiling: profiler.stop() self._is_profiling = False self.writer.close()
path = "data/Confocal_MICE/raw/training_raw.npy" # Load the training image data = np.load(path).astype(np.float32) # We are loading the histogram from the 'Convallaria-1-CreateNoiseModel' notebook # histogram = np.load(path + 'noiseModel.npy') # Create a NoiseModel object from the histogram. # noiseModel = hist_noise_model.NoiseModel(histogram) logging.config.fileConfig("configs/logging.conf") # TODO: how to deal with the noise model being a part of the model config as opposed to something generated from the data model_config = yaml2namespace(join('unittests', 'assets', 'ppn2v_model.yaml')) training_config = yaml2namespace(join('configs', 'training_config.yaml')) # data, mean, std = load_data(data, batch_size=training_config.batch_size, # patch_size=training_config.patch_size, # num_pix=100 * 100 // 32, supervised=False) train_data, val_data, mean, std = load_dataset('data/test_records') model = PPN2V(model_config, mean, std) profiler_v2.warmup() profiler_v2.start(logdir='model_instances/cheese') model.train(train_data, training_config) profiler_v2.stop()