def test_SoftActorCritic_brain_Critic_Q_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup):
    obs_t_ph, act_ph, _, _, _, exp_spec, playground = gym_and_tf_SAC_Brain_continuous_setup
    
    with tf_cv1.variable_scope(vocab.actor_network):
        pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)
    
    with tf_cv1.variable_scope(vocab.critic_network):
        Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_1)
        Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_2)
def test_SoftActorCritic_brain_Actor_Pi_TRAIN_PASS(gym_and_tf_SAC_Brain_continuous_setup):
    continuous_setup = gym_and_tf_SAC_Brain_continuous_setup
    obs_t_ph, act_ph, obs_t_prime_ph, reward_t_ph, trj_done_t_ph, exp_spec, playground = continuous_setup
    
    with tf_cv1.variable_scope(vocab.actor_network):
        pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)
    
    with tf_cv1.variable_scope(vocab.critic_network):
        Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_1)
        Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_2)
    
    actor_kl_loss, actor_policy_optimizer_op = SoftActorCriticBrain.actor_train(pi_log_p,
                                                                                Q_pi_1, Q_pi_2, exp_spec)
def test_SoftActorCritic_brain_tensor_entity_call_warning_investigationGYM_KERAS_PASS(
        gym_and_KERAS_DEV_continuous_setup):
    obs_t_ph, _, _, _, _, exp_spec, playground = gym_and_KERAS_DEV_continuous_setup
    exp_spec.set_experiment_spec({'phi_nn_h_layer_topo': (2, 2)})
    
    pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec,
                                                                               playground)
def test_SoftActorCritic_brain_Critic_Q_TRAIN_PASS(gym_and_tf_SAC_Brain_continuous_setup):
    continuous_setup = gym_and_tf_SAC_Brain_continuous_setup
    obs_t_ph, act_ph, obs_t_prime_ph, reward_t_ph, trj_done_t_ph, exp_spec, playground = continuous_setup
    
    with tf_cv1.variable_scope(vocab.actor_network):
        pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)
    
    with tf_cv1.variable_scope(vocab.critic_network):
        V_psi, V_psi_frozen = SoftActorCriticBrain.build_critic_graph_v_psi(obs_t_ph, obs_t_prime_ph, exp_spec)
        
        Q_act_1, Q_pi_1 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_1)
        Q_act_2, Q_pi_2 = SoftActorCriticBrain.build_critic_graph_q_theta(obs_t_ph, act_ph, pi, exp_spec,
                                                                          vocab.Q_theta_2)
    
    critic_lr_schedule, critic_global_grad_step = critic_learning_rate_scheduler(exp_spec)
    
    q_theta_train_ops = SoftActorCriticBrain.critic_q_theta_train(V_psi_frozen, Q_act_1, Q_act_2, reward_t_ph,
                                                                  trj_done_t_ph, exp_spec, critic_lr_schedule,
                                                                  critic_global_grad_step)
def test_SoftActorCritic_brain_Critic_V_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup):
    obs_t_ph, _, obs_t_prime_ph, _, _, exp_spec, _ = gym_and_tf_SAC_Brain_continuous_setup
    V_psi, V_psi_frozen = SoftActorCriticBrain.build_critic_graph_v_psi(obs_t_ph, obs_t_prime_ph, exp_spec)
def test_SoftActorCritic_brain_Actor_Pi_BUILD_PASS(gym_and_tf_SAC_Brain_continuous_setup):
    obs_t_ph, _, _, _, _, exp_spec, playground = gym_and_tf_SAC_Brain_continuous_setup
    pi, pi_log_p, policy_mu = SoftActorCriticBrain.build_gaussian_policy_graph(obs_t_ph, exp_spec, playground)