Пример #1
0
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                    [3, 4, 5, 3, 4, 5]])}
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
def test_visual_cc_bc_model():
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            model = BehavioralCloningModel(
                make_brain_parameters(discrete_action=False, visual_inputs=2))
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.visual_in[0]: np.ones([2, 40, 30, 3]),
                model.visual_in[1]: np.ones([2, 40, 30, 3]),
            }
            sess.run(run_list, feed_dict=feed_dict)
def test_dc_bc_model():
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            model = BehavioralCloningModel(
                make_brain_parameters(discrete_action=True, visual_inputs=0))
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.action_probs]
            feed_dict = {
                model.batch_size: 2,
                model.dropout_rate: 1.0,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.action_masks: np.ones([2, 2]),
            }
            sess.run(run_list, feed_dict=feed_dict)
Пример #4
0
    def __init__(self, seed, brain, trainer_parameters, load):
        """
        :param seed: Random seed.
        :param brain: Assigned Brain object.
        :param trainer_parameters: Defined training parameters.
        :param load: Whether a pre-trained model will be loaded or a new one created.
        """
        super(BCPolicy, self).__init__(seed, brain, trainer_parameters)

        self.last_loss = 0

        with self.graph.as_default():
            with self.graph.as_default():
                self.model = BehavioralCloningModel(
                    h_size=int(trainer_parameters["hidden_units"]),
                    lr=float(trainer_parameters["learning_rate"]),
                    n_layers=int(trainer_parameters["num_layers"]),
                    m_size=self.m_size,
                    normalize=False,
                    use_recurrent=trainer_parameters["use_recurrent"],
                    brain=brain,
                    seed=seed,
                )

        if load:
            self._load_graph()
        else:
            self._initialize_graph()

        self.inference_dict = {
            "action": self.model.sample_action,
            "action_probs": self.model.action_probs
        }
        self.update_dict = {
            "policy_loss": self.model.loss,
            "update_batch": self.model.update,
        }
        if self.use_recurrent:
            self.inference_dict["memory_out"] = self.model.memory_out

        self.evaluate_rate = 1.0
        self.update_rate = 0.5
Пример #5
0
def test_visual_dc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=2)
            env = UnityEnvironment(" ")
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.action_probs]
            feed_dict = {
                model.batch_size: 2,
                model.dropout_rate: 1.0,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.visual_in[0]: np.ones([2, 40, 30, 3]),
                model.visual_in[1]: np.ones([2, 40, 30, 3]),
                model.action_masks: np.ones([2, 2]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Пример #6
0
    def __init__(self, seed, brain, trainer_parameters, load):
        """
        :param seed: Random seed.
        :param brain: Assigned Brain object.
        :param trainer_parameters: Defined training parameters.
        :param load: Whether a pre-trained model will be loaded or a new one created.
        """
        super(BCPolicy, self).__init__(seed, brain, trainer_parameters)

        with self.graph.as_default():
            with self.graph.as_default():
                self.model = BehavioralCloningModel(
                    h_size=int(trainer_parameters['hidden_units']),
                    lr=float(trainer_parameters['learning_rate']),
                    n_layers=int(trainer_parameters['num_layers']),
                    m_size=self.m_size,
                    normalize=False,
                    use_recurrent=trainer_parameters['use_recurrent'],
                    brain=brain,
                    seed=seed)

        if load:
            self._load_graph()
        else:
            self._initialize_graph()

        self.inference_dict = {'action': self.model.sample_action}
        self.update_dict = {
            'policy_loss': self.model.loss,
            'update_batch': self.model.update
        }
        if self.use_recurrent:
            self.inference_dict['memory_out'] = self.model.memory_out

        self.evaluate_rate = 1.0
        self.update_rate = 0.5