def testEvaluationJob(self): model_version = 1 total_tasks = 5 latest_chkp_version = 2 job = EvaluationJob(_eval_metrics_fn(), model_version, total_tasks) self.assertEqual(0, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # Now make 4 tasks finished for i in range(4): job.complete_task() self.assertEqual(4, job._completed_tasks) self.assertFalse(job.finished()) self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) # One more task finishes job.complete_task() self.assertEqual(5, job._completed_tasks) self.assertTrue(job.finished()) self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) # No new model checkpoint latest_chkp_version = job.model_version self.assertFalse(self.ok_to_new_job(job, latest_chkp_version)) latest_chkp_version = job.model_version + 1 self.assertTrue(self.ok_to_new_job(job, latest_chkp_version)) model_outputs = {} model_outputs[MetricsDictKey.MODEL_OUTPUT] = ndarray_to_pb( np.array([[1], [6], [3]], np.float32) ) labels = ndarray_to_pb(np.array([[1], [0], [3]], np.float32)) job.report_evaluation_metrics(model_outputs, labels) job.report_evaluation_metrics( { MetricsDictKey.MODEL_OUTPUT: ndarray_to_pb( np.array([[4], [5], [6], [7], [8]], np.float32) ) }, ndarray_to_pb(np.array([[7], [8], [9], [10], [11]], np.float32)), ) expected_acc = 0.25 evaluation_metrics = job.evaluation_metrics.get_evaluation_summary() self.assertAlmostEqual(expected_acc, evaluation_metrics.get("acc")) self.assertAlmostEqual(expected_acc, evaluation_metrics.get("acc_fn")) self.assertAlmostEqual(10.125, evaluation_metrics.get("mse"))
def verify(array): pb = ndarray_to_pb(array) new_array = pb_to_ndarray(pb) np.testing.assert_array_equal(array, new_array)