def test_SoftActorCritic_brain_Critic_Q_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup): obs_t_ph, act_ph, _, _, _, exp_spec, playground = gym_and_tf_SAC_Brain_continuous_setup with tf_cv1.variable_scope(vocab.actor_network): pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground) with tf_cv1.variable_scope(vocab.critic_network): Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_1) Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_2)
def test_SoftActorCritic_brain_Actor_Pi_TRAIN_PASS(gym_and_tf_SAC_Brain_continuous_setup): continuous_setup = gym_and_tf_SAC_Brain_continuous_setup obs_t_ph, act_ph, obs_t_prime_ph, reward_t_ph, trj_done_t_ph, exp_spec, playground = continuous_setup with tf_cv1.variable_scope(vocab.actor_network): pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground) with tf_cv1.variable_scope(vocab.critic_network): Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_1) Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_2) actor_kl_loss, actor_policy_optimizer_op = SoftActorCriticBrain.actor_train(pi_log_p, Q_pi_1, Q_pi_2, exp_spec)
def test_SoftActorCritic_brain_tensor_entity_call_warning_investigationGYM_KERAS_PASS( gym_and_KERAS_DEV_continuous_setup): obs_t_ph, _, _, _, _, exp_spec, playground = gym_and_KERAS_DEV_continuous_setup exp_spec.set_experiment_spec({'phi_nn_h_layer_topo': (2, 2)}) pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)
def test_SoftActorCritic_brain_Critic_Q_TRAIN_PASS(gym_and_tf_SAC_Brain_continuous_setup): continuous_setup = gym_and_tf_SAC_Brain_continuous_setup obs_t_ph, act_ph, obs_t_prime_ph, reward_t_ph, trj_done_t_ph, exp_spec, playground = continuous_setup with tf_cv1.variable_scope(vocab.actor_network): pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground) with tf_cv1.variable_scope(vocab.critic_network): V_psi, V_psi_frozen = SoftActorCriticBrain.build_critic_graph_v_psi(obs_t_ph, obs_t_prime_ph, exp_spec) Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_1) Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec, vocab.Q_theta_2) critic_lr_schedule, critic_global_grad_step = critic_learning_rate_scheduler(exp_spec) q_theta_train_ops = SoftActorCriticBrain.critic_q_theta_train(V_psi_frozen, Q_act_1, Q_act_2, reward_t_ph, trj_done_t_ph, exp_spec, critic_lr_schedule, critic_global_grad_step)
def test_SoftActorCritic_brain_Critic_V_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup): obs_t_ph, _, obs_t_prime_ph, _, _, exp_spec, _ = gym_and_tf_SAC_Brain_continuous_setup V_psi, V_psi_frozen = SoftActorCriticBrain.build_critic_graph_v_psi(obs_t_ph, obs_t_prime_ph, exp_spec)
def test_SoftActorCritic_brain_Actor_Pi_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup): obs_t_ph, _, _, _, _, exp_spec, playground = gym_and_tf_SAC_Brain_continuous_setup pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)