Exemplo n.º 1
0
    def test_setup_model_feedforward(self):
        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = FeedForwardPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # check the policy_kwargs term
        policy_kwargs = FEEDFORWARD_PARAMS.copy()
        policy_kwargs.update(TD3_PARAMS)
        policy_kwargs['verbose'] = self.init_parameters['verbose']
        policy_kwargs['num_envs'] = self.init_parameters['num_envs']
        self.assertDictEqual(alg.policy_kwargs, policy_kwargs)

        with alg.graph.as_default():
            expected_vars = sorted([var.name for var in get_trainable_vars()])

        # Check that all trainable variables have been created in the
        # TensorFlow graph.
        self.assertListEqual(
            expected_vars,
            ['model/pi/fc0/bias:0',
             'model/pi/fc0/kernel:0',
             'model/pi/fc1/bias:0',
             'model/pi/fc1/kernel:0',
             'model/pi/output/bias:0',
             'model/pi/output/kernel:0',
             'model/qf_0/fc0/bias:0',
             'model/qf_0/fc0/kernel:0',
             'model/qf_0/fc1/bias:0',
             'model/qf_0/fc1/kernel:0',
             'model/qf_0/qf_output/bias:0',
             'model/qf_0/qf_output/kernel:0',
             'model/qf_1/fc0/bias:0',
             'model/qf_1/fc0/kernel:0',
             'model/qf_1/fc1/bias:0',
             'model/qf_1/fc1/kernel:0',
             'model/qf_1/qf_output/bias:0',
             'model/qf_1/qf_output/kernel:0',
             'target/pi/fc0/bias:0',
             'target/pi/fc0/kernel:0',
             'target/pi/fc1/bias:0',
             'target/pi/fc1/kernel:0',
             'target/pi/output/bias:0',
             'target/pi/output/kernel:0',
             'target/qf_0/fc0/bias:0',
             'target/qf_0/fc0/kernel:0',
             'target/qf_0/fc1/bias:0',
             'target/qf_0/fc1/kernel:0',
             'target/qf_0/qf_output/bias:0',
             'target/qf_0/qf_output/kernel:0',
             'target/qf_1/fc0/bias:0',
             'target/qf_1/fc0/kernel:0',
             'target/qf_1/fc1/bias:0',
             'target/qf_1/fc1/kernel:0',
             'target/qf_1/qf_output/bias:0',
             'target/qf_1/qf_output/kernel:0']
        )
Exemplo n.º 2
0
    def setUp(self):
        sess = tf.compat.v1.Session()

        self.policy_params = {
            'sess': sess,
            'ac_space': Box(low=-1, high=1, shape=(1, )),
            'ob_space': Box(low=-2, high=2, shape=(2, )),
            'co_space': Box(low=-3, high=3, shape=(3, )),
            'verbose': 0,
        }
        self.policy_params.update(FEEDFORWARD_PARAMS.copy())
Exemplo n.º 3
0
 def setUp(self):
     self.policy_params = {
         'ac_space': Box(low=-1, high=1, shape=(1,)),
         'ob_space': Box(low=-2, high=2, shape=(2,)),
         'co_space': Box(low=-3, high=3, shape=(3,)),
         'scope': None,
         'verbose': 0,
     }
     self.policy_params.update(TRPO_PARAMS.copy())
     self.policy_params.update(FEEDFORWARD_PARAMS.copy())
     self.policy_params["model_params"]["model_type"] = "fcnet"
Exemplo n.º 4
0
    def setUp(self):
        sess = tf.compat.v1.Session()

        self.policy_params = {
            'sess': sess,
            'ac_space': Box(low=-1, high=1, shape=(1, )),
            'ob_space': Box(low=-2, high=2, shape=(2, )),
            'co_space': Box(low=-3, high=3, shape=(3, )),
            'verbose': 0,
            'buffer_size': 200000,
            'batch_size': 128,
            'actor_lr': 3e-4,
            'critic_lr': 3e-4,
            'tau': 0.005,
            'gamma': 0.99,
            'use_huber': False,
        }
        self.policy_params.update(FEEDFORWARD_PARAMS.copy())