Пример #1
0
    def build_graph(self):
        sg_network = Network()
        sg_target_network = Network()

        sg_get_action = Actor()

        sg_loss = loss.DQNLoss(sg_network.output, config)
        sg_gradients_calc = optimizer.Gradients(sg_network.weights,
                                                loss=sg_loss)

        sg_update_target_weights = graph.AssignWeights(
            sg_target_network.weights, sg_network.weights).op

        # Expose public API
        self.op_assign_weights = self.Op(sg_network.weights.assign,
                                         weights=sg_network.weights.ph_weights)
        self.op_assign_target_weights = self.Op(
            sg_target_network.weights.assign,
            target_weights=sg_target_network.weights.ph_weights)

        self.op_get_q_value = self.Op(sg_network.output.node,
                                      state=sg_network.ph_state)
        self.op_get_q_target_value = self.Op(
            sg_target_network.output.node,
            next_state=sg_target_network.ph_state)

        self.op_get_action = self.Op(sg_get_action,
                                     local_step=sg_get_action.ph_local_step,
                                     q_value=sg_get_action.ph_q_value)

        sg_initialize = graph.Initialize()

        feeds = dict(state=sg_network.ph_state,
                     reward=sg_loss.ph_reward,
                     action=sg_loss.ph_action,
                     terminal=sg_loss.ph_terminal,
                     q_next_target=sg_loss.ph_q_next_target,
                     q_next=sg_loss.ph_q_next)

        self.op_compute_gradients = self.Op(sg_gradients_calc.calculate,
                                            **feeds)

        self.op_update_target_weights = self.Op(sg_update_target_weights)

        self.op_initialize = self.Op(sg_initialize)
Пример #2
0
    def build_graph(self):
        # Build graph
        sg_actor_network = ActorNetwork()
        sg_critic_network = CriticNetwork()
        sg_actor_target_network = ActorNetwork()
        sg_critic_target_network = CriticNetwork()

        ph_action_gradient = graph.Placeholder(np.float32, (None, cfg.config.output.action_size))
        actor_grad_args = dict(loss=sg_actor_network.actor, grad_ys=-ph_action_gradient.node)

        if cfg.config.no_ps:
            sg_actor_optimizer = optimizer.AdamOptimizer(cfg.config.actor_learning_rate)
            actor_grad_args.update(dict(optimizer=sg_actor_optimizer))

        sg_actor_gradients = optimizer.Gradients(sg_actor_network.weights, **actor_grad_args)

        sg_critic_loss = loss.DDPGLoss(sg_critic_network, cfg.config)
        critic_grad_args = dict(loss=sg_critic_loss)

        if cfg.config.no_ps:
            sg_critic_optimizer = optimizer.AdamOptimizer(cfg.config.critic_learning_rate)
            critic_grad_args.update(dict(optimizer=sg_critic_optimizer))

        sg_critic_gradients = optimizer.Gradients(sg_critic_network.weights, **critic_grad_args)

        sg_critic_action_gradients = optimizer.Gradients(sg_critic_network.ph_action,
                                                         loss=sg_critic_network.critic)

        # Expose public API
        self.op_assign_actor_weights = self.Op(sg_actor_network.weights.assign,
                                               weights=sg_actor_network.weights.ph_weights)
        self.op_assign_critic_weights = self.Op(sg_critic_network.weights.assign,
                                                weights=sg_critic_network.weights.ph_weights)
        self.op_assign_actor_target_weights = self.Op(sg_actor_target_network.weights.assign,
                                                      weights=sg_actor_target_network.weights.ph_weights)
        self.op_assign_critic_target_weights = self.Op(sg_critic_target_network.weights.assign,
                                                       weights=sg_critic_target_network.weights.ph_weights)

        self.op_get_action = self.Op(sg_actor_network.actor,
                                     state=sg_actor_network.ph_state)
        self.op_get_critic_q = self.Op(sg_critic_network.critic,
                                       state=sg_critic_network.ph_state,
                                       action=sg_critic_network.ph_action)

        self.op_get_actor_target = self.Op(sg_actor_target_network.actor,
                                           state=sg_actor_target_network.ph_state)
        self.op_get_critic_target = self.Op(sg_critic_target_network.critic,
                                            state=sg_critic_target_network.ph_state,
                                            action=sg_critic_target_network.ph_action)

        self.op_compute_actor_gradients = self.Op(sg_actor_gradients.calculate,
                                                  state=sg_actor_network.ph_state,
                                                  grad_ys=ph_action_gradient)

        self.op_compute_critic_gradients = self.Op(sg_critic_gradients.calculate,
                                                   state=sg_critic_network.ph_state,
                                                   action=sg_critic_network.ph_action,
                                                   predicted=sg_critic_loss.ph_predicted)

        self.op_compute_critic_action_gradients = self.Op(sg_critic_action_gradients.calculate,
                                                          state=sg_critic_network.ph_state,
                                                          action=sg_critic_network.ph_action)

        # Integrated with grad computation by log_lvl
        self.op_critic_loss = self.Op(sg_critic_loss,
                                      state=sg_critic_network.ph_state,
                                      action=sg_critic_network.ph_action,
                                      predicted=sg_critic_loss.ph_predicted)
        self.op_compute_norm_actor_gradients = self.Op(sg_actor_gradients.global_norm,
                                                       state=sg_actor_network.ph_state,
                                                       grad_ys=ph_action_gradient)
        self.op_compute_norm_critic_gradients = self.Op(sg_critic_gradients.global_norm,
                                                        state=sg_critic_network.ph_state,
                                                        action=sg_critic_network.ph_action,
                                                        predicted=sg_critic_loss.ph_predicted)
        self.op_compute_norm_critic_action_gradients = self.Op(sg_critic_action_gradients.global_norm,
                                                               state=sg_critic_network.ph_state,
                                                               action=sg_critic_network.ph_action)

        if cfg.config.no_ps:
            sg_actor_weights = sg_actor_network.weights
            sg_critic_weights = sg_critic_network.weights

            sg_actor_target_weights = sg_actor_target_network.weights
            sg_critic_target_weights = sg_critic_target_network.weights

            # needs reassign weights from actor & critic to target networks
            sg_init_actor_target_weights = \
                graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op
            sg_init_critic_target_weights = \
                graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op

            sg_update_actor_target_weights = \
                graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op
            sg_update_critic_target_weights = \
                graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op

            self.op_get_weights = self.Ops(sg_actor_weights, sg_actor_target_weights,
                                           sg_critic_weights, sg_critic_target_weights)

            self.op_init_target_weights = self.Ops(sg_init_actor_target_weights,
                                                   sg_init_critic_target_weights)

            self.op_update_target_weights = self.Ops(sg_update_actor_target_weights,
                                                     sg_update_critic_target_weights)

            self.op_apply_actor_gradients = self.Ops(sg_actor_gradients.apply,
                                                     gradients=sg_actor_gradients.ph_gradients)
            self.op_apply_critic_gradients = self.Op(sg_critic_gradients.apply,
                                                     gradients=sg_critic_gradients.ph_gradients)
            sg_initialize = graph.Initialize()
            self.op_initialize = self.Op(sg_initialize)
Пример #3
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_episode_cnt = graph.GlobalStep()

        sg_actor_weights = ActorNetwork().weights
        sg_critic_weights = CriticNetwork().weights

        sg_actor_target_weights = ActorNetwork().weights
        sg_critic_target_weights = CriticNetwork().weights

        sg_get_weights_flatten = \
            graph.GetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights))
        sg_set_weights_flatten = \
            graph.SetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights))

        # needs reassign weights from actor & critic to target networks
        sg_init_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op
        sg_init_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op

        sg_update_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op
        sg_update_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op

        sg_actor_optimizer = optimizer.AdamOptimizer(cfg.config.actor_learning_rate)
        sg_critic_optimizer = optimizer.AdamOptimizer(cfg.config.critic_learning_rate)

        sg_actor_gradients = optimizer.Gradients(sg_actor_weights, optimizer=sg_actor_optimizer)
        sg_critic_gradients = optimizer.Gradients(sg_critic_weights, optimizer=sg_critic_optimizer)

        sg_average_reward = graph.LinearMovingAverage(cfg.config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_get_weights_signed = self.Ops(sg_actor_weights, sg_actor_target_weights,
                                              sg_critic_weights, sg_critic_target_weights, sg_global_step.n)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_init_target_weights = self.Ops(sg_init_actor_target_weights,
                                               sg_init_critic_target_weights)

        self.op_update_target_weights = self.Ops(sg_update_actor_target_weights,
                                                 sg_update_critic_target_weights)

        self.op_apply_gradients = self.Ops(sg_actor_gradients.apply, sg_critic_gradients.apply,
                                           sg_global_step.increment,
                                           gradients=(sg_actor_gradients.ph_gradients,
                                                      sg_critic_gradients.ph_gradients),
                                           increment=sg_global_step.ph_increment)
        self.op_add_rewards_to_model_score_routine = self.Ops(sg_average_reward.add,
                                                              reward_sum=sg_average_reward.ph_sum,
                                                              reward_weight=sg_average_reward.ph_count)
        self.op_score = self.Op(sg_average_reward.average)

        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment)

        self.op_get_episode_cnt = self.Op(sg_episode_cnt.n)
        self.op_inc_episode_cnt = self.Op(sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment)

        self.op_submit_gradients = self.Call(graph.get_gradients_apply_routine(cfg.config))
        self.op_initialize = self.Op(sg_initialize)
Пример #4
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_episode_cnt = graph.GlobalStep()

        sg_actor_weights = ActorNetwork().weights
        sg_critic_weights = CriticNetwork().weights

        sg_actor_target_weights = ActorNetwork().weights
        sg_critic_target_weights = CriticNetwork().weights

        # needs reassign weights from actor & critic to target networks
        sg_init_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op
        sg_init_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op

        sg_update_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op
        sg_update_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op

        sg_actor_optimizer = optimizer.AdamOptimizer(
            cfg.config.actor_learning_rate)
        sg_critic_optimizer = optimizer.AdamOptimizer(
            cfg.config.critic_learning_rate)

        sg_actor_gradients = optimizer.Gradients(sg_actor_weights,
                                                 optimizer=sg_actor_optimizer)
        sg_critic_gradients = optimizer.Gradients(
            sg_critic_weights, optimizer=sg_critic_optimizer)

        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_get_weights = self.Ops(sg_actor_weights,
                                       sg_actor_target_weights,
                                       sg_critic_weights,
                                       sg_critic_target_weights)

        self.op_init_target_weights = self.Ops(sg_init_actor_target_weights,
                                               sg_init_critic_target_weights)

        self.op_update_target_weights = self.Ops(
            sg_update_actor_target_weights, sg_update_critic_target_weights)

        self.op_apply_actor_gradients = self.Ops(
            sg_actor_gradients.apply,
            sg_global_step.increment,
            gradients=sg_actor_gradients.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_apply_critic_gradients = self.Op(
            sg_critic_gradients.apply,
            gradients=sg_critic_gradients.ph_gradients)

        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment,
                                   increment=sg_global_step.ph_increment)

        self.op_get_episode_cnt = self.Op(sg_episode_cnt.n)
        self.op_inc_episode_cnt = self.Op(
            sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment)

        self.op_initialize = self.Op(sg_initialize)