def test_run_std_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = RunConfig( master='host2:2222', num_cores=15, gpu_memory_fraction=0.314, ) for est in self._estimators_for_tests(config): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. ex.run_std_server() # Assert. mock_server.assert_has_calls( [test.mock.call().start(), test.mock.call().join()])
def test_run_std_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = RunConfig( master='host2:2222', num_cores=15, gpu_memory_fraction=0.314, ) for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. ex.run_std_server() # Assert. mock_server.assert_has_calls( [test.mock.call().start(), test.mock.call().join()])
def test_continuous_eval_evaluates_checkpoint_once(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): est.fake_checkpoint() result = { 'called': 0, 'called_with_eval_result': 0, } # pylint: disable=cell-var-from-loop def _predicate_fn(eval_result): result['called'] += 1 if eval_result: # If eval_result is not empty nor None, the checkpoint has been # evaluated. result['called_with_eval_result'] += 1 # With 300 times of evaluation, this should prove something. return result['called'] < 300 # pylint: enable=cell-var-from-loop ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval(evaluate_checkpoint_only_once=True, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(300, result['called']) self.assertEqual(1, result['called_with_eval_result'])
def test_min_eval_frequency_defaults(self): def dummy_model_fn(features, labels): # pylint: disable=unused-argument pass # The default value when model_dir is on GCS is 1000 estimator = Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = Experiment(estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._eval_every_n_steps, 1) # The default value when model_dir is not on GCS is 1 estimator = Estimator(dummy_model_fn, '/tmp/dummy') ex = Experiment(estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._eval_every_n_steps, 1) # Make sure default not used when explicitly set estimator = Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = Experiment(estimator, eval_every_n_steps=123, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._eval_every_n_steps, 123) # Make sure default not used when explicitly set as 0 estimator = Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = Experiment(estimator, eval_every_n_steps=0, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._eval_every_n_steps, 0)
def test_continuous_eval_evaluates_checkpoint_once(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): est.fake_checkpoint() result = { 'called': 0, 'called_with_eval_result': 0, } # pylint: disable=cell-var-from-loop def _predicate_fn(eval_result): result['called'] += 1 if eval_result: # If eval_result is not empty nor None, the checkpoint has been # evaluated. result['called_with_eval_result'] += 1 # With 300 times of evaluation, this should prove something. return result['called'] < 300 # pylint: enable=cell-var-from-loop ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval(evaluate_checkpoint_only_once=True, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(300, result['called']) self.assertEqual(1, result['called_with_eval_result'])
def test_train(self): for est in self._estimators_for_tests(): ex = Experiment(est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input') fit_args = ex.train(delay_secs=0) self.assertEqual(1, est.fit_count) self.assertIn(('max_steps', 'train_steps'), fit_args) self.assertEqual(0, est.eval_count)
def test_run_std_server_raises_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): with self.assertRaises(ValueError): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.run_std_server()
def test_continuous_train_and_eval_with_invalid_predicate_fn(self): for est in self._estimators_for_tests(): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') with self.assertRaisesRegexp(ValueError, '`continuous_eval_predicate_fn` must be a callable'): ex.continuous_train_and_evaluate(continuous_eval_predicate_fn='fn')
def test_run_std_server_raises_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): with self.assertRaises(ValueError): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.run_std_server()
def test_test(self): for est in self._estimators_for_tests(): export_strategy = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', export_strategies=export_strategy) ex.test() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count)
def test_train_delay(self): for est in self._estimators_for_tests(): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4)
def test_continuous_train_and_eval_with_invalid_predicate_fn(self): for est in self._estimators_for_tests(): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') with self.assertRaisesRegexp( ValueError, '`continuous_eval_predicate_fn` must be a callable'): ex.continuous_train_and_evaluate( continuous_eval_predicate_fn='fn')
def test_export_strategies_reset(self): for est in self._estimators_for_tests(): export_strategy_1 = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=(export_strategy_1,)) ex.train_and_evaluate() self.assertEqual(1, est.export_count) # After reset with empty list (None), the count does not change and the # user provided export strategy list should remain intact. old_es = ex.reset_export_strategies() ex.train_and_evaluate() self.assertAllEqual([export_strategy_1], old_es) self.assertEqual(1, est.export_count) # After reset with list, the count should increase with the number of # items. export_strategy_2 = make_export_strategy(est, None, exports_to_keep=None) export_strategy_3 = make_export_strategy(est, None, exports_to_keep=None) old_es = ex.reset_export_strategies([export_strategy_2, export_strategy_3]) ex.train_and_evaluate() self.assertAllEqual([], old_es) self.assertEqual(3, est.export_count)
def test_train_server_does_not_start_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because there was no ClusterSpec. self.assertFalse(mock_server.called)
def test_train_server_does_not_start_without_cluster_spec( self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because there was no ClusterSpec. self.assertFalse(mock_server.called)
def test_train_server_does_not_start_with_empty_master(self, mock_server): tf_config = {'cluster': self._cluster_spec()} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig(master='') for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because master was the empty string. self.assertFalse(mock_server.called)
def test_train_and_evaluate_with_no_eval_during_training(self): for est in self._estimators_for_tests(): noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], train_steps=100, eval_steps=100) ex.train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count)
def test_continuous_train_and_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): noop_hook = _NoopHook() export_strategy = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy) ex.continuous_train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_train_server_does_not_start_with_empty_master(self, mock_server): tf_config = {'cluster': self._cluster_spec()} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig(master='') for est in self._estimators_for_tests(config): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because master was the empty string. self.assertFalse(mock_server.called)
def test_train_and_evaluate_with_no_eval_during_training(self): for est in self._estimators_for_tests(): noop_hook = _NoopHook() ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], train_steps=100, eval_steps=100) ex.train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count)
def test_evaluate(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_steps='steps', eval_delay_secs=0) ex.evaluate() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_evaluate_delay(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook]) for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4) self.assertEqual([noop_hook], est.eval_hooks)
def test_continuous_eval_ends_after_train_step(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0, train_steps=100) ex.continuous_eval() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_train_default_delay(self): for task_id in [0, 1, 3]: tf_config = {'task': {'index': task_id}} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = RunConfig() for est in self._estimators_for_tests(config): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train() self.assertAlmostEqual(task_id * 5, sheep.time(), delta=1e-4)
def test_evaluate(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_steps='steps', eval_delay_secs=0) ex.evaluate() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_invalid_export_strategies(self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies='not_an_export_strategy') with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=['not_an_export_srategy'])
def test_continuous_eval_predicate_fn(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() def _predicate_fn(unused_eval_result): return est.eval_count < 3 # pylint: disable=cell-var-from-loop ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval(evaluate_checkpoint_only_once=False, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(3, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_evaluate_delay(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook]) for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4) self.assertEqual([noop_hook], est.eval_hooks)
def test_continuous_eval_ends_after_train_step(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0, train_steps=100) ex.continuous_eval() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_train_hooks_extend_does_not_mutate_input_hooks(self): for est in self._estimators_for_tests(): noop_hook = _NoopHook() input_hooks = [noop_hook] ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_hooks=input_hooks) self.assertAllEqual([noop_hook], ex._train_hooks) another_noop_hook = _NoopHook() # Assert that the extend API mutates the hooks, but not the input hooks ex.extend_train_hooks([another_noop_hook]) self.assertAllEqual([noop_hook, another_noop_hook], ex._train_hooks) self.assertAllEqual([noop_hook], input_hooks)
def test_train_hooks_extend_does_not_mutate_input_hooks(self): for est in self._estimators_for_tests(): noop_hook = _NoopHook() input_hooks = [noop_hook] ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', train_hooks=input_hooks) self.assertAllEqual([noop_hook], ex._train_hooks) another_noop_hook = _NoopHook() # Assert that the extend API mutates the hooks, but not the input hooks ex.extend_train_hooks([another_noop_hook]) self.assertAllEqual([noop_hook, another_noop_hook], ex._train_hooks) self.assertAllEqual([noop_hook], input_hooks)
def test_continuous_train_and_eval_with_adapted_steps_per_iteration(self): mock_estimator = test.mock.Mock(Estimator) type(mock_estimator).model_dir = test.mock.PropertyMock(return_value='test_dir') total_steps = 100000000000000 ex = Experiment(mock_estimator, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=total_steps, train_steps_per_iteration=None) def predicate_fn(eval_result): # Allows the first invoke only. return eval_result is None ex.continuous_train_and_evaluate(continuous_eval_predicate_fn=predicate_fn) mock_estimator.train.assert_called_once_with( input_fn='train_input', steps=int(total_steps / 10), max_steps=None, hooks=[])
def test_train_default_delay(self): for task_id in [0, 1, 3]: tf_config = {'task': {'index': task_id}} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = RunConfig() for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train() self.assertAlmostEqual(task_id * 5, sheep.time(), delta=1e-4)
def test_continuous_train_and_eval_with_predicate_fn(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): export_strategy = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100000000000, # a value will make `ex` never stops. eval_steps=100, export_strategies=export_strategy) def predicate_fn(eval_result): del eval_result # unused. for fn signature. return False ex.continuous_train_and_evaluate(continuous_eval_predicate_fn=predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(0, est.eval_count) self.assertEqual(1, est.export_count)
def test_train_starts_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig(master='host4:2222', num_cores=15, gpu_memory_fraction=0.314) for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. # We want to make sure we discount the time it takes to start the server # in our accounting of the delay, so we set a small delay here. sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=1) # Ensure that the delay takes into account the time to start server. self.assertAlmostEqual(1, sheep.time(), delta=1e-4) # Assert. expected_config_proto = config_pb2.ConfigProto() expected_config_proto.inter_op_parallelism_threads = 15 expected_config_proto.intra_op_parallelism_threads = 15 expected_config_proto.gpu_options.per_process_gpu_memory_fraction = 0.314 mock_server.assert_called_with( config.cluster_spec, job_name=run_config_lib.TaskType.WORKER, task_index=1, config=expected_config_proto, start=False) mock_server.assert_has_calls([test.mock.call().start()])
def test_continuous_train_and_eval_with_invalid_train_steps_iterations( self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp( ValueError, '`train_steps_per_iteration` must be an integer.'): Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps_per_iteration='123')
def test_train_raises_if_job_name_is_missing(self): tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config) }), self.assertRaises(ValueError): config = run_config_lib.RunConfig( master='host3:2222' # Normally selected by task type. ) for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train()
def test_train_raises_if_job_name_is_missing(self): tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'index': 1 } } with test.mock.patch.dict( 'os.environ', {'TF_CONFIG': json.dumps(tf_config)}), self.assertRaises(ValueError): config = run_config_lib.RunConfig( master='host3:2222' # Normally selected by task type. ) for est in self._estimators_for_tests(config): ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train()
def test_continuous_train_and_eval_with_default_steps_per_iteration(self): mock_estimator = test.mock.Mock(Estimator) type(mock_estimator).model_dir = test.mock.PropertyMock( return_value='test_dir') ex = Experiment(mock_estimator, train_input_fn='train_input', eval_input_fn='eval_input', train_steps_per_iteration=None, train_steps=None) def predicate_fn(eval_result): # Allows the first invoke only. return eval_result is None ex.continuous_train_and_evaluate( continuous_eval_predicate_fn=predicate_fn) mock_estimator.train.assert_called_once_with(input_fn='train_input', steps=1000, max_steps=test.mock.ANY, hooks=test.mock.ANY)
def test_train_starts_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig( master='host4:2222', num_cores=15, gpu_memory_fraction=0.314) for est in self._estimators_for_tests(config): ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. # We want to make sure we discount the time it takes to start the server # in our accounting of the delay, so we set a small delay here. sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=1) # Ensure that the delay takes into account the time to start server. self.assertAlmostEqual(1, sheep.time(), delta=1e-4) # Assert. expected_config_proto = config_pb2.ConfigProto() expected_config_proto.inter_op_parallelism_threads = 15 expected_config_proto.intra_op_parallelism_threads = 15 expected_config_proto.gpu_options.per_process_gpu_memory_fraction = 0.314 mock_server.assert_called_with( config.cluster_spec, job_name=run_config_lib.TaskType.WORKER, task_index=1, config=expected_config_proto, start=False) mock_server.assert_has_calls([test.mock.call().start()])
def test_continuous_train_and_eval_with_predicate_fn(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): export_strategy = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100000000000, # a value will make `ex` never stops. eval_steps=100, export_strategies=export_strategy) def predicate_fn(eval_result): del eval_result # unused. for fn signature. return False ex.continuous_train_and_evaluate( continuous_eval_predicate_fn=predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(0, est.eval_count) self.assertEqual(1, est.export_count)
def test_export_strategies_reset(self): for est in self._estimators_for_tests(): export_strategy_1 = make_export_strategy(est, None, exports_to_keep=None) ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=(export_strategy_1, )) ex.train_and_evaluate() self.assertEqual(1, est.export_count) # After reset with empty list (None), the count does not change and the # user provided export strategy list should remain intact. old_es = ex.reset_export_strategies() ex.train_and_evaluate() self.assertAllEqual([export_strategy_1], old_es) self.assertEqual(1, est.export_count) # After reset with list, the count should increase with the number of # items. export_strategy_2 = make_export_strategy(est, None, exports_to_keep=None) export_strategy_3 = make_export_strategy(est, None, exports_to_keep=None) old_es = ex.reset_export_strategies( [export_strategy_2, export_strategy_3]) ex.train_and_evaluate() self.assertAllEqual([], old_es) self.assertEqual(3, est.export_count)
def test_continuous_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) self.assertRaises(StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False) self.assertEqual(0, est.fit_count) self.assertEqual(6, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def prepare_experiment_run(polyaxonfile, experiment_id, task_type=TaskType.MASTER, task_id=0): plx_file = PolyaxonFile.read(polyaxonfile) cluster, _ = plx_file.get_cluster_def_at(experiment_id) if (task_type not in cluster or not isinstance(cluster[task_type], int) or task_id >= cluster[task_type]): raise ValueError('task_type, task_id `{}, {}` is not supported by ' 'the specification file passed.'.format( task_type, task_id)) env = plx_file.get_environment_at(experiment_id) if not env: tf.logging.set_verbosity(tf.logging.INFO) configs = {TaskType.MASTER: [RunConfig()]} delay_workers_by_global_step = False else: tf.logging.set_verbosity( LOGGING_LEVEL[plx_file.settings.logging.level]) configs, _ = _get_run_configs(plx_file, experiment_id) delay_workers_by_global_step = env.delay_workers_by_global_step train_input_fn, train_steps, train_hooks = _get_train( plx_file.get_train_at(experiment_id)) (eval_input_fn, eval_steps, eval_hooks, eval_delay_secs, continuous_eval_throttle_secs) = _get_eval( plx_file.get_eval_at(experiment_id)) estimator = getters.get_estimator( plx_file.get_model_at(experiment_id), configs[task_type][task_id], output_dir=plx_file.get_project_path_at(experiment_id)) return Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=train_steps, eval_steps=eval_steps, train_hooks=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=eval_delay_secs, continuous_eval_throttle_secs=continuous_eval_throttle_secs, delay_workers_by_global_step=delay_workers_by_global_step, export_strategies=plx_file.settings.export_strategies)
def get_experiment(config): estimator = getters.get_estimator(plx_file.model, config, output_dir=plx_file.project_path) return Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=train_steps, eval_steps=eval_steps, train_hooks=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=eval_delay_secs, continuous_eval_throttle_secs=continuous_eval_throttle_secs, delay_workers_by_global_step=delay_workers_by_global_step, export_strategies=plx_file.settings.export_strategies)
def create_experiment(experiment_config): """Creates a new `Experiment` instance. Args: experiment_config: the config to use for creating the experiment. """ # Creates training input function train_input_data_config = experiment_config.train_input_data_config train_input_fn = create_input_data_fn( pipeline_config=train_input_data_config.pipeline_config, mode=Modes.TRAIN, scope='train_input_fn', input_type=train_input_data_config.input_type, x=train_input_data_config.x, y=train_input_data_config.y) # Creates eval_input_fn input function eval_input_data_config = experiment_config.eval_input_data_config eval_input_fn = create_input_data_fn( pipeline_config=eval_input_data_config.pipeline_config, mode=Modes.EVAL, scope='eval_input_fn', input_type=eval_input_data_config.input_type, x=eval_input_data_config.x, y=eval_input_data_config.y) estimator = getters.get_estimator(experiment_config.estimator_config, experiment_config.model_config, experiment_config.run_config) train_hooks = getters.get_hooks(experiment_config.train_hooks_config) eval_hooks = getters.get_hooks(experiment_config.eval_hooks_config) experiment = Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=experiment_config.train_steps, eval_steps=experiment_config.eval_steps, train_hooks=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=experiment_config.eval_delay_secs, continuous_eval_throttle_secs=experiment_config.continuous_eval_throttle_secs, eval_every_n_steps=experiment_config.eval_every_n_steps, delay_workers_by_global_step=experiment_config.delay_workers_by_global_step, export_strategies=experiment_config.export_strategies, train_steps_per_iteration=experiment_config.train_steps_per_iteration) return experiment
def test_continuous_eval_throttle_delay(self): for delay in [0, 1, 2]: for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook], continuous_eval_throttle_secs=delay, eval_delay_secs=0) sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): self.assertRaises(StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False) self.assertAlmostEqual(5 * delay, sheep.time(), delta=1e-4)