def test_evaluate(self): evaluator = eval_metrics.Evaluator([ metrics_online.StddevWithinRuns(), metrics_online.StddevWithinRuns() ]) results = evaluator.evaluate(self.run_dirs) self.assertEqual(list(results.keys()), ['StddevWithinRuns']) self.assertTrue(np.greater(list(results.values()), 0.).all())
def test_evaluate_using_environment_steps(self): gin.bind_parameter('metrics_online.StddevWithinRuns.eval_points', [2001]) metric_instances = [ metrics_online.StddevWithinRuns(), metrics_online.StddevWithinRuns() ] evaluator = eval_metrics.Evaluator( metric_instances, timepoint_variable='Metrics/EnvironmentSteps') results = evaluator.evaluate(self.run_dirs) self.assertEqual(list(results.keys()), ['StddevWithinRuns']) self.assertTrue(np.greater(list(results.values()), 0.).all())
def test_evaluate_with_permutations(self): evaluator = eval_metrics.Evaluator([metrics_online.StddevWithinRuns()]) n_permutations = 3 permutation_start_idx = 100 random_seed = 50 outfile_prefix = os.path.join(FLAGS.test_tmpdir, 'robustness_results_permuted_') results = evaluator.evaluate_with_permutations( self.run_dirs, self.run_dirs, outfile_prefix, n_permutations, permutation_start_idx, random_seed) # Check length of results. self.assertLen(results, n_permutations) # Check a single result. one_result = list(results.values())[0]['curves1'] self.assertEqual(list(one_result.keys()), ['StddevWithinRuns']) self.assertTrue(np.greater(list(one_result.values()), 0.).all()) # Check the output files. results_files = io_utils.paths_glob('%s*results.json' % outfile_prefix) self.assertLen(results_files, 1) # If run again with the same seed, the results should be the same results_same = evaluator.evaluate_with_permutations( self.run_dirs, self.run_dirs, outfile_prefix, n_permutations, permutation_start_idx, random_seed) self._assert_results_same(results, results_same) # If run again with a different seed, the results should be different results_different = evaluator.evaluate_with_permutations( self.run_dirs, self.run_dirs, outfile_prefix, n_permutations, permutation_start_idx, random_seed + 1) self._assert_results_different(results, results_different)
def testCorrectStddevWithinRuns(self, timepoints, window_size, baseline, expected): curves = [ np.array([[5, 7, 9], [1, 1, 1]]), np.array([[5, 7, 9, 11], [2, 3, 4, 5]]), np.array([[5, 7, 9, 10], [5, 4, 2, 1]]) ] metric = metrics_online.StddevWithinRuns(window_size, timepoints, baseline) result = metric(curves) self.assertEqual(metric.name, 'StddevWithinRuns') np.testing.assert_allclose(result, expected)