Python SidecarEvaluator示例，keras.distribute.sidecar_evaluator.SidecarEvaluator Python示例

示例#1

0

显示文件

文件： sidecar_evaluator_test.py 项目： huaxz1986/keras

    def testTimeoutFunction(self, model_type, build_model):
        checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints')
        # Create a model with synthetic data, and fit for one epoch.
        data = np.random.random((1000, 32))
        labels = np.random.random((1000, 10))
        dataset = tf.data.Dataset.from_tensor_slices((data, labels))
        dataset = dataset.batch(_BATCH_SIZE)

        # Create a new model used for evaluation.
        eval_model = _test_model_builder(model_type=model_type,
                                         compile_model=True,
                                         build_model=build_model)
        # Have an sidecar_evaluator evaluate once.
        sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
            eval_model,
            data=dataset,
            checkpoint_dir=checkpoint_dir,
            max_evaluations=1)
        with self.assertLogs() as cm:
            threading.Thread(target=sidecar_evaluator.start,
                             daemon=True).start()
            time.sleep(50)

        metrics_logging = [
            l for l in cm.output if 'No checkpoints appear to be found' in l
        ]
        self.assertGreaterEqual(len(metrics_logging), 1)

示例#2

0

显示文件

    def testSidecarEvaluatorOutputsSummarySavedWithCallback(self):
        checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints')
        log_dir = os.path.join(self.get_temp_dir(), 'summary')
        # Create a model with synthetic data, and fit for one epoch.
        model = self.createTestModel(compile_model=True)
        data = np.random.random((1000, 32))
        labels = np.random.random((1000, 10))
        dataset = tf.data.Dataset.from_tensor_slices((data, labels))
        dataset = dataset.batch(32)
        save_callback = keras.callbacks.ModelCheckpoint(filepath=os.path.join(
            checkpoint_dir, 'ckpt-{epoch}'),
                                                        save_weights_only=True)
        model.fit(dataset, epochs=1, callbacks=[save_callback])
        self.assertNotEmpty(
            tf.io.gfile.listdir(checkpoint_dir),
            'Checkpoint should have been written and '
            'checkpoint_dir should not be empty.')

        # Create a new model used for evaluation.
        eval_model = self.createTestModel(compile_model=True)
        # Have an sidecar_evaluator evaluate once.
        sidecar_evaluator_lib.SidecarEvaluator(eval_model,
                                               data=dataset,
                                               checkpoint_dir=checkpoint_dir,
                                               log_dir=log_dir,
                                               max_evaluations=1).start()
        # Eval model has been restored to the same state as the original model, so
        # their weights should match. If not, restoration of the model didn't
        # work.
        self.assertModelsSameVariables(model, eval_model)

        self.assertSummaryEventsWritten(log_dir)

示例#3

0

显示文件

文件： sidecar_evaluator_test.py 项目： huaxz1986/keras

    def testSidecarEvaluatorOutputsSummarySavedWithCallback(
            self, model_type, build_model):
        checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints')
        log_dir = os.path.join(self.get_temp_dir(), 'summary')
        # Create a model with synthetic data, and fit for one epoch.
        model = _test_model_builder(model_type=model_type,
                                    compile_model=True,
                                    build_model=False)
        data = np.random.random((1000, 32))
        labels = np.random.random((1000, 10))
        dataset = tf.data.Dataset.from_tensor_slices((data, labels))
        dataset = dataset.batch(_BATCH_SIZE)
        save_callback = keras.callbacks.ModelCheckpoint(filepath=os.path.join(
            checkpoint_dir, 'ckpt-{epoch}'),
                                                        save_weights_only=True)
        model.fit(dataset, epochs=1, callbacks=[save_callback])
        self.assertNotEmpty(
            tf.io.gfile.listdir(checkpoint_dir),
            'Checkpoint should have been written and '
            'checkpoint_dir should not be empty.')

        # Create a new model used for evaluation.
        eval_model = _test_model_builder(model_type=model_type,
                                         compile_model=True,
                                         build_model=build_model)
        # Have an sidecar_evaluator evaluate once.
        sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
            eval_model,
            data=dataset,
            checkpoint_dir=checkpoint_dir,
            max_evaluations=1,
            callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)])
        with self.assertLogs() as cm:
            sidecar_evaluator.start()

        metrics_logging = [
            line for line in cm.output if 'End of evaluation' in line
        ]
        self.assertLen(metrics_logging, 1)
        expected_logged_metrics = [
            'loss', 'categorical_accuracy', 'mean_squared_error_1',
            'mean_squared_error_2'
        ]
        for metric_name in expected_logged_metrics:
            self.assertRegex(metrics_logging[0], f'{metric_name}=')

        # Eval model has been restored to the same state as the original model, so
        # their weights should match. If not, restoration of the model didn't
        # work.
        self.assertModelsSameVariables(model, eval_model)

        # check the iterations is restored.
        self.assertEqual(sidecar_evaluator._iterations.numpy(), _BATCH_SIZE)

        self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation'))

示例#4

0

显示文件

  def testModelNotBuiltRaiseError(self, model_type):
    model = _test_model_builder(
        model_type=model_type, compile_model=False, build_model=False)

    checkpoint_dir = self.get_temp_dir()
    checkpoint = tf.train.Checkpoint(model=model)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint, checkpoint_dir, max_to_keep=2)
    checkpoint_manager.save()

    sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
        model, data=None, checkpoint_dir=checkpoint_dir)
    with self.assertRaisesRegex(AssertionError, 'Nothing to load.'):
      sidecar_evaluator.start()

示例#5

0

显示文件

    def testSidecarEvaluatorOutputsSummary(self, model_type, build_model):
        # Create a model with synthetic data, and fit for one epoch.
        model = _test_model_builder(
            model_type=model_type, compile_model=True, build_model=False
        )
        data = np.random.random((1000, 32))
        labels = np.random.random((1000, 10))
        dataset = tf.data.Dataset.from_tensor_slices((data, labels))
        dataset = dataset.batch(32)
        model.fit(dataset, epochs=1)

        # Save a checkpoint.
        checkpoint_dir = os.path.join(self.get_temp_dir(), "ckpt")
        log_dir = os.path.join(self.get_temp_dir(), "summary")
        logging.info(
            "checkpoint_dir = %s, log_dir = %s", checkpoint_dir, log_dir
        )
        checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint, checkpoint_dir, max_to_keep=2
        )
        logging.info(
            "Checkpoint manager saved to: %s", checkpoint_manager.save()
        )
        self.assertNotEmpty(
            tf.io.gfile.listdir(checkpoint_dir),
            "Checkpoint should have been written and "
            "checkpoint_dir should not be empty.",
        )

        # Create a new model used for evaluation.
        eval_model = _test_model_builder(
            model_type=model_type, compile_model=True, build_model=build_model
        )
        # Have a sidecar_evaluator evaluate once.
        sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
            eval_model,
            data=dataset,
            checkpoint_dir=checkpoint_dir,
            max_evaluations=1,
            callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)],
        )
        sidecar_evaluator.start()
        # Eval model has been restored to the same state as the original model, so
        # their weights should match. If not, restoration of the model didn't
        # work.
        self.assertModelsSameVariables(model, eval_model)

        self.assertSummaryEventsWritten(os.path.join(log_dir, "validation"))

示例#6

0

显示文件

  def testIterationsNotSavedWillRaiseError(self):
    model = self.createTestModel(compile_model=False)

    checkpoint_dir = self.get_temp_dir()
    checkpoint = tf.train.Checkpoint(model=model)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint, checkpoint_dir, max_to_keep=2)
    checkpoint_manager.save()

    sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
        model, data=None, checkpoint_dir=checkpoint_dir)
    with self.assertRaisesRegexp(
        RuntimeError, '`iterations` cannot be loaded '
        'from the checkpoint file.'):
      sidecar_evaluator.start()

示例#7

0

显示文件

    def testIterationsNotSavedWillRaiseError(self, model_type):
        model = _test_model_builder(
            model_type=model_type, compile_model=False, build_model=True
        )

        checkpoint_dir = self.get_temp_dir()
        checkpoint = tf.train.Checkpoint(model=model)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint, checkpoint_dir, max_to_keep=2
        )
        checkpoint_manager.save()

        sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator(
            model, data=None, checkpoint_dir=checkpoint_dir
        )
        with self.assertRaisesRegex(
            RuntimeError,
            "`iterations` cannot be loaded " "from the checkpoint file.",
        ):
            sidecar_evaluator.start()

示例#8

0

显示文件

    def testSidecarEvaluatorOutputsSummary(self):
        # Create a model with synthetic data, and fit for one epoch.
        model = self.createTestModel(compile_model=True)
        data = np.random.random((1000, 32))
        labels = np.random.random((1000, 10))
        dataset = tf.data.Dataset.from_tensor_slices((data, labels))
        dataset = dataset.batch(32)
        model.fit(dataset, epochs=1)

        # Save a checkpoint.
        checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt')
        log_dir = os.path.join(self.get_temp_dir(), 'summary')
        logging.info('checkpoint_dir = %s, log_dir = %s', checkpoint_dir,
                     log_dir)
        checkpoint = tf.train.Checkpoint(model=model,
                                         optimizer=model.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                        checkpoint_dir,
                                                        max_to_keep=2)
        logging.info('Checkpoint manager saved to: %s',
                     checkpoint_manager.save())
        self.assertNotEmpty(
            tf.io.gfile.listdir(checkpoint_dir),
            'Checkpoint should have been written and '
            'checkpoint_dir should not be empty.')

        # Create a new model used for evaluation.
        eval_model = self.createTestModel(compile_model=True)
        # Have an sidecar_evaluator evaluate once.
        sidecar_evaluator_lib.SidecarEvaluator(eval_model,
                                               data=dataset,
                                               checkpoint_dir=checkpoint_dir,
                                               log_dir=log_dir,
                                               max_evaluations=1).start()
        # Eval model has been restored to the same state as the original model, so
        # their weights should match. If not, restoration of the model didn't
        # work.
        self.assertModelsSameVariables(model, eval_model)

        self.assertSummaryEventsWritten(log_dir)