def test_uses_training_env_as_evaluation_env(self): tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.compat.v1.trainable_variables()) config = copy.deepcopy(CONFIG) self.assertNotIn('evaluation', config['environment_params']) config['run_params']['checkpoint_replay_pool'] = True experiment_runner = ExperimentRunner(config=config) session = experiment_runner._session experiment_runner._build() self.assertIs(experiment_runner.training_environment, experiment_runner.evaluation_environment) self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(session.run(experiment_runner.algorithm._alpha), 1.0) for i in range(2): experiment_runner.train()
def test_checkpoint_pool_reconstruction(self): tf.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.trainable_variables()) config = CONFIG.copy() config['run_params']['checkpoint_replay_pool'] = True experiment_runner = ExperimentRunner(config=config) session = experiment_runner._session experiment_runner._build() self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(session.run(experiment_runner.algorithm._alpha), 1.0) checkpoints = [] while (experiment_runner.replay_pool.size < experiment_runner.replay_pool._max_size): for i in range(10): experiment_runner.train() checkpoints.append(experiment_runner.save()) tf.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.trainable_variables()) experiment_runner_2 = ExperimentRunner(config=config) session = experiment_runner_2._session self.assertFalse(experiment_runner_2._built) experiment_runner_2.restore(checkpoints[-1]) replay_pool_1 = experiment_runner.replay_pool replay_pool_2 = experiment_runner_2.replay_pool self.assertEqual(replay_pool_1._max_size, replay_pool_2._max_size) self.assertEqual(replay_pool_1.size, replay_pool_2.size) self.assertEqual(replay_pool_2._max_size, replay_pool_2.size) self.assertEqual( set(replay_pool_1.fields.keys()), set(replay_pool_2.fields.keys())) for field_name, field_attrs in replay_pool_1.fields.items(): np.testing.assert_array_equal( getattr(replay_pool_1, field_name), getattr(replay_pool_2, field_name))
def test_checkpoint_pool_reconstruction(self): tf.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.trainable_variables()) experiment_runner = ExperimentRunner(config=CONFIG) session = experiment_runner._session experiment_runner._build() self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(session.run(experiment_runner.algorithm._alpha), 1.0) for i in range(10): experiment_runner.train()
def test_checkpoint_pool_reconstruction(self): config = copy.deepcopy(CONFIG) config['run_params']['checkpoint_replay_pool'] = True experiment_runner = ExperimentRunner(config=config) experiment_runner._build() self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(experiment_runner.algorithm._alpha.numpy(), 1.0) checkpoints = [] while (experiment_runner.replay_pool.size < experiment_runner.replay_pool._max_size): for i in range(10): experiment_runner.train() checkpoints.append(experiment_runner.save()) experiment_runner_2 = ExperimentRunner(config=config) self.assertFalse(experiment_runner_2._built) experiment_runner_2.restore(checkpoints[-1]) replay_pool_1 = experiment_runner.replay_pool replay_pool_2 = experiment_runner_2.replay_pool self.assertEqual(replay_pool_1._max_size, replay_pool_2._max_size) self.assertEqual(replay_pool_1.size, replay_pool_2.size) self.assertEqual(replay_pool_2._max_size, replay_pool_2.size) self.assertEqual(set(replay_pool_1.fields.keys()), set(replay_pool_2.fields.keys())) for field_name in replay_pool_1.fields.keys(): np.testing.assert_array_equal(replay_pool_1.fields[field_name], replay_pool_2.fields[field_name])
def test_uses_training_env_as_evaluation_env(self): config = copy.deepcopy(CONFIG) self.assertNotIn('evaluation', config['environment_params']) config['run_params']['checkpoint_replay_pool'] = True experiment_runner = ExperimentRunner(config=config) experiment_runner._build() self.assertIs(experiment_runner.training_environment, experiment_runner.evaluation_environment) self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(experiment_runner.algorithm._alpha.numpy(), 1.0) for i in range(2): experiment_runner.train()
def test_checkpoint_dict(self): tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.compat.v1.trainable_variables()) config = copy.deepcopy(CONFIG) experiment_runner = ExperimentRunner(config=config) session = experiment_runner._session experiment_runner._build() self.assertEqual(experiment_runner.algorithm._epoch, 0) self.assertEqual(experiment_runner.algorithm._timestep, 0) self.assertEqual(experiment_runner.algorithm._total_timestep, 0) self.assertFalse(experiment_runner.algorithm._training_started) self.assertEqual(experiment_runner.replay_pool.size, 0) self.assertEqual(session.run(experiment_runner.algorithm._alpha), 1.0) initial_policy_weights = experiment_runner.policy.get_weights() initial_Qs_weights = [Q.get_weights() for Q in experiment_runner.Qs] for i in range(10): experiment_runner.train() self.assertEqual(experiment_runner.algorithm._epoch, 9) self.assertEqual(experiment_runner.algorithm._timestep, 20) self.assertEqual(experiment_runner.algorithm._total_timestep, 200) self.assertTrue(experiment_runner.algorithm._training_started) self.assertNotEqual(session.run(experiment_runner.algorithm._alpha), 1.0) self.assertEqual(experiment_runner.replay_pool.size, 210) policy_weights = experiment_runner.policy.get_weights() Qs_weights = [Q.get_weights() for Q in experiment_runner.Qs] # Make sure that the training changed all the weights assert_weights_not_equal(initial_policy_weights, policy_weights) for initial_Q_weights, Q_weights in zip(initial_Qs_weights, Qs_weights): assert_weights_not_equal(initial_Q_weights, Q_weights) expected_alpha_value = 5.0 session.run( tf.assign(experiment_runner.algorithm._log_alpha, np.log(expected_alpha_value))) self.assertEqual(session.run(experiment_runner.algorithm._alpha), expected_alpha_value) trainable_variables_1 = { 'policy': experiment_runner.policy.trainable_variables, 'Q0': experiment_runner.Qs[0].trainable_variables, 'Q1': experiment_runner.Qs[1].trainable_variables, 'target_Q0': (experiment_runner.algorithm._Q_targets[0].trainable_variables), 'target_Q1': (experiment_runner.algorithm._Q_targets[1].trainable_variables), 'log_alpha': [experiment_runner.algorithm._log_alpha], } trainable_variables_1_np = session.run(trainable_variables_1) assert set(variable for _, variables in trainable_variables_1.items() for variable in variables) == set( variable for variable in tf.trainable_variables() if 'save_counter' not in variable.name) optimizer_variables_1 = { 'Q_optimizer_1': (experiment_runner.algorithm._Q_optimizers[0].variables()), 'Q_optimizer_2': (experiment_runner.algorithm._Q_optimizers[1].variables()), 'policy_optimizer': (experiment_runner.algorithm._policy_optimizer.variables()), 'alpha_optimizer': (experiment_runner.algorithm._alpha_optimizer.variables()), } optimizer_variables_1_np = session.run(optimizer_variables_1) checkpoint = experiment_runner.save() tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() self.assertFalse(tf.compat.v1.trainable_variables()) experiment_runner_2 = ExperimentRunner(config=config) session = experiment_runner_2._session self.assertFalse(experiment_runner_2._built) experiment_runner_2.restore(checkpoint) trainable_variables_2 = { 'policy': experiment_runner_2.policy.trainable_variables, 'Q0': experiment_runner_2.Qs[0].trainable_variables, 'Q1': experiment_runner_2.Qs[1].trainable_variables, 'target_Q0': (experiment_runner_2.algorithm._Q_targets[0].trainable_variables), 'target_Q1': (experiment_runner_2.algorithm._Q_targets[1].trainable_variables), 'log_alpha': [experiment_runner_2.algorithm._log_alpha], } trainable_variables_2_np = session.run(trainable_variables_2) assert set(variable for _, variables in trainable_variables_2.items() for variable in variables) == set( variable for variable in tf.trainable_variables() if 'save_counter' not in variable.name) optimizer_variables_2 = { 'Q_optimizer_1': (experiment_runner_2.algorithm._Q_optimizers[0].variables()), 'Q_optimizer_2': (experiment_runner_2.algorithm._Q_optimizers[1].variables()), 'policy_optimizer': (experiment_runner_2.algorithm._policy_optimizer.variables()), 'alpha_optimizer': (experiment_runner_2.algorithm._alpha_optimizer.variables()), } optimizer_variables_2_np = session.run(optimizer_variables_2) for i, (key, variables_1_np) in enumerate(trainable_variables_1_np.items()): print() variables_1_tf = trainable_variables_1[key] variables_2_tf = trainable_variables_2[key] variables_2_np = trainable_variables_2_np[key] for j, (variable_1_np, variable_2_np, variable_1_tf, variable_2_tf) in enumerate( zip(variables_1_np, variables_2_np, variables_1_tf, variables_2_tf)): allclose = np.allclose(variable_1_np, variable_2_np) variable_1_name = variable_1_tf.name variable_2_name = variable_2_tf.name print(f"i: {i}; j: {j}; {key};" f" {allclose}; {variable_1_name}; {variable_2_name}") if 'target_Q' in key: pass else: np.testing.assert_allclose(variable_1_np, variable_2_np) # for optimizer_key in optimizer_variables_1_np.keys(): # variables_1_np = optimizer_variables_1_np[optimizer_key] # variables_2_np = optimizer_variables_2_np[optimizer_key] # for variable_1_np, variable_2_np in zip( # variables_1_np, variables_2_np): # np.testing.assert_allclose(variable_1_np, variable_2_np) for i in (0, 1): Q_variables_tf = trainable_variables_1[f'Q{i}'] Q_variables_np = trainable_variables_1_np[f'Q{i}'] target_Q_variables_tf = trainable_variables_2[f'target_Q{i}'] target_Q_variables_np = trainable_variables_2_np[f'target_Q{i}'] for j, (Q_np, target_Q_np, Q_tf, target_Q_tf) in enumerate( zip(Q_variables_np, target_Q_variables_np, Q_variables_tf, target_Q_variables_tf)): allclose = np.allclose(Q_np, target_Q_np) Q_name = Q_tf.name target_Q_name = target_Q_tf.name # print(f"i: {i}; {allclose}; {Q_name}; {target_Q_name}") self.assertEqual(experiment_runner_2.algorithm._epoch, 10) self.assertEqual(experiment_runner_2.algorithm._timestep, 0) self.assertEqual(session.run(experiment_runner_2.algorithm._alpha), expected_alpha_value) for i in range(10): experiment_runner_2.train() self.assertEqual(experiment_runner_2.algorithm._epoch, 19) self.assertEqual(experiment_runner_2.algorithm._timestep, 20) self.assertEqual(experiment_runner_2.algorithm._total_timestep, 400) self.assertTrue(experiment_runner_2.algorithm._training_started)