def testTimeoutFunction(self, model_type, build_model): checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') # Create a model with synthetic data, and fit for one epoch. data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = tf.data.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(_BATCH_SIZE) # Create a new model used for evaluation. eval_model = _test_model_builder(model_type=model_type, compile_model=True, build_model=build_model) # Have an sidecar_evaluator evaluate once. sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( eval_model, data=dataset, checkpoint_dir=checkpoint_dir, max_evaluations=1) with self.assertLogs() as cm: threading.Thread(target=sidecar_evaluator.start, daemon=True).start() time.sleep(50) metrics_logging = [ l for l in cm.output if 'No checkpoints appear to be found' in l ] self.assertGreaterEqual(len(metrics_logging), 1)
def testSidecarEvaluatorOutputsSummarySavedWithCallback(self): checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') log_dir = os.path.join(self.get_temp_dir(), 'summary') # Create a model with synthetic data, and fit for one epoch. model = self.createTestModel(compile_model=True) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = tf.data.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) save_callback = keras.callbacks.ModelCheckpoint(filepath=os.path.join( checkpoint_dir, 'ckpt-{epoch}'), save_weights_only=True) model.fit(dataset, epochs=1, callbacks=[save_callback]) self.assertNotEmpty( tf.io.gfile.listdir(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = self.createTestModel(compile_model=True) # Have an sidecar_evaluator evaluate once. sidecar_evaluator_lib.SidecarEvaluator(eval_model, data=dataset, checkpoint_dir=checkpoint_dir, log_dir=log_dir, max_evaluations=1).start() # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) self.assertSummaryEventsWritten(log_dir)
def testSidecarEvaluatorOutputsSummarySavedWithCallback( self, model_type, build_model): checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') log_dir = os.path.join(self.get_temp_dir(), 'summary') # Create a model with synthetic data, and fit for one epoch. model = _test_model_builder(model_type=model_type, compile_model=True, build_model=False) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = tf.data.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(_BATCH_SIZE) save_callback = keras.callbacks.ModelCheckpoint(filepath=os.path.join( checkpoint_dir, 'ckpt-{epoch}'), save_weights_only=True) model.fit(dataset, epochs=1, callbacks=[save_callback]) self.assertNotEmpty( tf.io.gfile.listdir(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = _test_model_builder(model_type=model_type, compile_model=True, build_model=build_model) # Have an sidecar_evaluator evaluate once. sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( eval_model, data=dataset, checkpoint_dir=checkpoint_dir, max_evaluations=1, callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)]) with self.assertLogs() as cm: sidecar_evaluator.start() metrics_logging = [ line for line in cm.output if 'End of evaluation' in line ] self.assertLen(metrics_logging, 1) expected_logged_metrics = [ 'loss', 'categorical_accuracy', 'mean_squared_error_1', 'mean_squared_error_2' ] for metric_name in expected_logged_metrics: self.assertRegex(metrics_logging[0], f'{metric_name}=') # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) # check the iterations is restored. self.assertEqual(sidecar_evaluator._iterations.numpy(), _BATCH_SIZE) self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation'))
def testModelNotBuiltRaiseError(self, model_type): model = _test_model_builder( model_type=model_type, compile_model=False, build_model=False) checkpoint_dir = self.get_temp_dir() checkpoint = tf.train.Checkpoint(model=model) checkpoint_manager = tf.train.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2) checkpoint_manager.save() sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( model, data=None, checkpoint_dir=checkpoint_dir) with self.assertRaisesRegex(AssertionError, 'Nothing to load.'): sidecar_evaluator.start()
def testSidecarEvaluatorOutputsSummary(self, model_type, build_model): # Create a model with synthetic data, and fit for one epoch. model = _test_model_builder( model_type=model_type, compile_model=True, build_model=False ) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = tf.data.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) model.fit(dataset, epochs=1) # Save a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), "ckpt") log_dir = os.path.join(self.get_temp_dir(), "summary") logging.info( "checkpoint_dir = %s, log_dir = %s", checkpoint_dir, log_dir ) checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer) checkpoint_manager = tf.train.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2 ) logging.info( "Checkpoint manager saved to: %s", checkpoint_manager.save() ) self.assertNotEmpty( tf.io.gfile.listdir(checkpoint_dir), "Checkpoint should have been written and " "checkpoint_dir should not be empty.", ) # Create a new model used for evaluation. eval_model = _test_model_builder( model_type=model_type, compile_model=True, build_model=build_model ) # Have a sidecar_evaluator evaluate once. sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( eval_model, data=dataset, checkpoint_dir=checkpoint_dir, max_evaluations=1, callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)], ) sidecar_evaluator.start() # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) self.assertSummaryEventsWritten(os.path.join(log_dir, "validation"))
def testIterationsNotSavedWillRaiseError(self): model = self.createTestModel(compile_model=False) checkpoint_dir = self.get_temp_dir() checkpoint = tf.train.Checkpoint(model=model) checkpoint_manager = tf.train.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2) checkpoint_manager.save() sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( model, data=None, checkpoint_dir=checkpoint_dir) with self.assertRaisesRegexp( RuntimeError, '`iterations` cannot be loaded ' 'from the checkpoint file.'): sidecar_evaluator.start()
def testIterationsNotSavedWillRaiseError(self, model_type): model = _test_model_builder( model_type=model_type, compile_model=False, build_model=True ) checkpoint_dir = self.get_temp_dir() checkpoint = tf.train.Checkpoint(model=model) checkpoint_manager = tf.train.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2 ) checkpoint_manager.save() sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( model, data=None, checkpoint_dir=checkpoint_dir ) with self.assertRaisesRegex( RuntimeError, "`iterations` cannot be loaded " "from the checkpoint file.", ): sidecar_evaluator.start()
def testSidecarEvaluatorOutputsSummary(self): # Create a model with synthetic data, and fit for one epoch. model = self.createTestModel(compile_model=True) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = tf.data.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) model.fit(dataset, epochs=1) # Save a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') log_dir = os.path.join(self.get_temp_dir(), 'summary') logging.info('checkpoint_dir = %s, log_dir = %s', checkpoint_dir, log_dir) checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=2) logging.info('Checkpoint manager saved to: %s', checkpoint_manager.save()) self.assertNotEmpty( tf.io.gfile.listdir(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = self.createTestModel(compile_model=True) # Have an sidecar_evaluator evaluate once. sidecar_evaluator_lib.SidecarEvaluator(eval_model, data=dataset, checkpoint_dir=checkpoint_dir, log_dir=log_dir, max_evaluations=1).start() # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) self.assertSummaryEventsWritten(log_dir)