def build_graph(self, weights): # Build graph sg_global_step = graph.GlobalStep() sg_update_step = graph.GlobalStep() sg_weights = weights if dppo_config.config.use_linear_schedule: if dppo_config.config.schedule_step == 'update': sg_schedule_step = sg_update_step elif dppo_config.config.schedule_step == 'environment': sg_schedule_step = sg_global_step else: assert False, 'Valid options for the schedule step are: update OR environment.' \ 'You provide the following option:'.format(dppo_config.config.schedule_step) sg_learning_rate = lr_schedule.Linear(sg_schedule_step, dppo_config.config) else: sg_learning_rate = dppo_config.config.initial_learning_rate sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate, epsilon=dppo_config.config.optimizer.epsilon) sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer) sg_average_reward = graph.LinearMovingAverage(dppo_config.config.avg_in_num_batches) sg_initialize = graph.Initialize() # Weights get/set for updating the policy sg_get_weights_flatten = graph.GetVariablesFlatten(sg_weights) sg_set_weights_flatten = graph.SetVariablesFlatten(sg_weights) # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_upd_step = self.Op(sg_update_step.n) self.op_score = self.Op(sg_average_reward.average) self.op_inc_global_step = self.Ops(sg_global_step.increment, increment=sg_global_step.ph_increment) self.op_inc_global_step_and_average_reward = self.Ops(sg_global_step.increment, sg_average_reward.add, increment=sg_global_step.ph_increment, reward_sum=sg_average_reward.ph_sum, reward_weight=sg_average_reward.ph_count) self.op_get_weights = self.Op(sg_weights) self.op_get_weights_signed = self.Ops(sg_weights, sg_update_step.n) self.op_apply_gradients = self.Ops(sg_gradients.apply, sg_update_step.increment, gradients=sg_gradients.ph_gradients, increment=sg_update_step.ph_increment) self.op_get_weights_flatten = self.Op(sg_get_weights_flatten) self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value) # Gradient combining routines self.op_submit_gradients = self.Call(graph.get_gradients_apply_routine(dppo_config.config)) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): sg_weights = _ManagerNetwork().weights sg_global_step = graph.GlobalStep() # self.learning_rate_input = graph.Placeholder(np.float32, shape=(1,), name="manager_lr") # tf.placeholder(tf.float32, [], name="manager_lr") sg_learning_rate = fun_graph.LearningRate(sg_global_step) sg_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=cfg.RMSProp.decay, momentum=0.0, epsilon=cfg.RMSProp.epsilon) sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_get_weights = self.Op(sg_weights) self.op_apply_gradients = self.Ops( sg_gradients.apply, sg_global_step.increment, gradients=sg_gradients.ph_gradients, increment=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_policy_net = PolicyNet() sg_n_iter = trpo_graph.NIter() sg_global_step = graph.GlobalStep() sg_value_net = ValueNet() sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize) self.call_wait_for_iteration = self.Call(self.wait_for_iteration) self.call_send_experience = self.Call(self.send_experience) self.call_receive_weights = self.Call(self.receive_weights) self.op_turn_collect_on = sg_n_iter.op_turn_collect_on self.op_turn_collect_off = sg_n_iter.op_turn_collect_off self.op_n_iter_value = sg_n_iter.op_n_iter_value self.op_n_iter = sg_n_iter.op_n_iter self.op_next_iter = sg_n_iter.op_next_iter self.policy = sg_policy_net self.value = sg_value_net
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_network = Network() self.actor = sg_network.actor self.critic = sg_network.critic if da3c_config.config.optimizer == 'Adam': sg_actor_optimizer = optimizer.AdamOptimizer( da3c_config.config.initial_learning_rate) sg_critic_optimizer = optimizer.AdamOptimizer( da3c_config.config.initial_learning_rate) else: sg_learning_rate = da3c_graph.LearningRate( sg_global_step, da3c_config.config.initial_learning_rate) sg_actor_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=da3c_config.config.RMSProp.decay, momentum=0.0, epsilon=da3c_config.config.RMSProp.epsilon) sg_critic_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=da3c_config.config.RMSProp.decay, momentum=0.0, epsilon=da3c_config.config.RMSProp.epsilon) sg_actor_gradients = optimizer.Gradients(self.actor.weights, optimizer=sg_actor_optimizer) sg_critic_gradients = optimizer.Gradients( self.critic.weights, optimizer=sg_critic_optimizer) if da3c_config.config.use_icm: sg_icm_optimizer = optimizer.AdamOptimizer( da3c_config.config.icm.lr) sg_icm_weights = icm_model.ICM().weights sg_icm_gradients = optimizer.Gradients(sg_icm_weights, optimizer=sg_icm_optimizer) # Expose ICM public API self.op_icm_get_weights = self.Op(sg_icm_weights) self.op_icm_apply_gradients = self.Op( sg_icm_gradients.apply, gradients=sg_icm_gradients.ph_gradients) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_check_weights = self.Ops(self.actor.weights.check, self.critic.weights.check) self.op_get_weights = self.Ops(self.actor.weights, self.critic.weights) self.op_apply_gradients = self.Ops( sg_actor_gradients.apply, sg_critic_gradients.apply, sg_global_step.increment, gradients=(sg_actor_gradients.ph_gradients, sg_critic_gradients.ph_gradients), increment=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): sg_global_step = graph.GlobalStep() sg_network = Network() sg_get_weights_flatten = graph.GetVariablesFlatten(sg_network.weights) sg_set_weights_flatten = graph.SetVariablesFlatten(sg_network.weights) if config.use_linear_schedule: sg_learning_rate = lr_schedule.Linear(sg_global_step, config) else: sg_learning_rate = config.initial_learning_rate if config.optimizer == 'Adam': sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate) elif config.optimizer == 'RMSProp': sg_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=config.RMSProp.decay, epsilon=config.RMSProp.epsilon) else: assert False, 'There 2 valid options for optimizers: Adam | RMSProp' sg_gradients_apply = optimizer.Gradients(sg_network.weights, optimizer=sg_optimizer) sg_average_reward = graph.LinearMovingAverage( config.avg_in_num_batches) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_score = self.Op(sg_average_reward.average) self.op_get_weights_signed = self.Ops(sg_network.weights, sg_global_step.n) self.op_assign_weights = self.Op(sg_network.weights.assign, weights=sg_network.weights.ph_weights) self.op_apply_gradients = self.Ops( sg_gradients_apply.apply, sg_global_step.increment, gradients=sg_gradients_apply.ph_gradients, increment=sg_global_step.ph_increment) self.op_add_rewards_to_model_score_routine = self.Ops( sg_average_reward.add, reward_sum=sg_average_reward.ph_sum, reward_weight=sg_average_reward.ph_count) self.op_get_weights_flatten = self.Op(sg_get_weights_flatten) self.op_set_weights_flatten = self.Op( sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value) # Gradient combining routines self.op_submit_gradients = self.Call( graph.get_gradients_apply_routine(config)) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_weights = Network().weights sg_optimizer = optimizer.AdamOptimizer(pg_config.config.learning_rate) sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_get_weights = self.Op(sg_weights) self.op_apply_gradients = self.Ops(sg_gradients.apply, sg_global_step.increment, gradients=sg_gradients.ph_gradients, increment=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): sg_global_step = graph.GlobalStep() sg_network = Network() if config.optimizer == 'Adam': sg_optimizer = optimizer.AdamOptimizer( config.initial_learning_rate) elif config.optimizer == 'RMSProp': param = {} if hasattr(config, 'RMSProp'): if hasattr(config.RMSProp, "decay"): param["decay"] = config.RMSProp.decay if hasattr(config.RMSProp, "epsilon"): param["epsilon"] = config.RMSProp.epsilon sg_optimizer = optimizer.RMSPropOptimizer( config.initial_learning_rate, **param) else: raise NotImplementedError sg_gradients_apply = optimizer.Gradients(sg_network.weights, optimizer=sg_optimizer) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_get_weights = self.Op(sg_network.weights) self.op_assign_weights = self.Op(sg_network.weights.assign, weights=sg_network.weights.ph_weights) self.op_apply_gradients = self.Ops( sg_gradients_apply.apply, sg_global_step.increment, gradients=sg_gradients_apply.ph_gradients, n_steps=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_policy_net = PolicyNet() sg_n_iter = trpo_graph.NIter() sg_global_step = graph.GlobalStep() sg_value_net = ValueNet() sg_average_reward = graph.LinearMovingAverage( trpo_config.config.avg_in_num_batches) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment) self.op_score = self.Op(sg_average_reward.average) self.op_add_reward_to_model_score_routine = self.Ops( sg_average_reward.add, reward_sum=sg_average_reward.ph_sum, reward_weight=sg_average_reward.ph_count) self.call_wait_for_iteration = self.Call(self.wait_for_iteration) self.call_send_experience = self.Call(self.send_experience) self.call_receive_weights = self.Call(self.receive_weights) self.op_turn_collect_on = sg_n_iter.op_turn_collect_on self.op_turn_collect_off = sg_n_iter.op_turn_collect_off self.op_n_iter_value = sg_n_iter.op_n_iter_value self.op_n_iter = sg_n_iter.op_n_iter self.op_next_iter = sg_n_iter.op_next_iter self.policy = sg_policy_net self.value = sg_value_net self.op_initialize = self.Op(sg_initialize)
def build_graph(self): sg_weights = _WorkerNetwork().weights sg_global_step = graph.GlobalStep() sg_learning_rate = fun_graph.LearningRate(sg_global_step) sg_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=cfg.RMSProp.decay, momentum=0.0, epsilon=cfg.RMSProp.epsilon) sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_get_weights = self.Op(sg_weights) self.op_apply_gradients = self.Ops( sg_gradients.apply, sg_global_step.increment, gradients=sg_gradients.ph_gradients, increment=sg_global_step.ph_increment) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_network = Network() sg_weights = sg_network.weights if da3c_config.config.use_linear_schedule: sg_learning_rate = lr_schedule.Linear(sg_global_step, da3c_config.config) else: sg_learning_rate = da3c_config.config.initial_learning_rate if da3c_config.config.optimizer == 'Adam': sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate) else: sg_optimizer = optimizer.RMSPropOptimizer( learning_rate=sg_learning_rate, decay=da3c_config.config.RMSProp.decay, momentum=0.0, epsilon=da3c_config.config.RMSProp.epsilon) sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer) if da3c_config.config.use_icm: sg_icm_optimizer = optimizer.AdamOptimizer( da3c_config.config.icm.lr) sg_icm_weights = icm_model.ICM().weights sg_icm_gradients = optimizer.Gradients(sg_icm_weights, optimizer=sg_icm_optimizer) # Expose ICM public API self.op_icm_get_weights = self.Op(sg_icm_weights) self.op_icm_apply_gradients = self.Op( sg_icm_gradients.apply, gradients=sg_icm_gradients.ph_gradients) sg_average_reward = graph.LinearMovingAverage( da3c_config.config.avg_in_num_batches) sg_initialize = graph.Initialize() # Expose public API self.op_n_step = self.Op(sg_global_step.n) self.op_score = self.Op(sg_average_reward.average) self.op_check_weights = self.Op(sg_weights.check) self.op_get_weights = self.Ops(sg_weights, sg_global_step.n) self.op_apply_gradients = self.Ops( sg_gradients.apply, sg_global_step.increment, gradients=sg_gradients.ph_gradients, increment=sg_global_step.ph_increment) self.op_add_rewards_to_model_score_routine = self.Ops( sg_average_reward.add, reward_sum=sg_average_reward.ph_sum, reward_weight=sg_average_reward.ph_count) # Determine Gradients' applying methods: fifo (by default), averaging, delay compensation sg_get_weights_flatten = graph.GetVariablesFlatten(sg_weights) sg_set_weights_flatten = graph.SetVariablesFlatten(sg_weights) self.op_get_weights_flatten = self.Op(sg_get_weights_flatten) self.op_set_weights_flatten = self.Op( sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value) self.op_submit_gradients = self.Call( graph.get_gradients_apply_routine(da3c_config.config)) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_episode_cnt = graph.GlobalStep() sg_actor_weights = ActorNetwork().weights sg_critic_weights = CriticNetwork().weights sg_actor_target_weights = ActorNetwork().weights sg_critic_target_weights = CriticNetwork().weights sg_get_weights_flatten = \ graph.GetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights)) sg_set_weights_flatten = \ graph.SetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights)) # needs reassign weights from actor & critic to target networks sg_init_actor_target_weights = \ graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op sg_init_critic_target_weights = \ graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op sg_update_actor_target_weights = \ graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op sg_update_critic_target_weights = \ graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op sg_actor_optimizer = optimizer.AdamOptimizer(cfg.config.actor_learning_rate) sg_critic_optimizer = optimizer.AdamOptimizer(cfg.config.critic_learning_rate) sg_actor_gradients = optimizer.Gradients(sg_actor_weights, optimizer=sg_actor_optimizer) sg_critic_gradients = optimizer.Gradients(sg_critic_weights, optimizer=sg_critic_optimizer) sg_average_reward = graph.LinearMovingAverage(cfg.config.avg_in_num_batches) sg_initialize = graph.Initialize() # Expose public API self.op_get_weights_signed = self.Ops(sg_actor_weights, sg_actor_target_weights, sg_critic_weights, sg_critic_target_weights, sg_global_step.n) self.op_get_weights_flatten = self.Op(sg_get_weights_flatten) self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value) self.op_init_target_weights = self.Ops(sg_init_actor_target_weights, sg_init_critic_target_weights) self.op_update_target_weights = self.Ops(sg_update_actor_target_weights, sg_update_critic_target_weights) self.op_apply_gradients = self.Ops(sg_actor_gradients.apply, sg_critic_gradients.apply, sg_global_step.increment, gradients=(sg_actor_gradients.ph_gradients, sg_critic_gradients.ph_gradients), increment=sg_global_step.ph_increment) self.op_add_rewards_to_model_score_routine = self.Ops(sg_average_reward.add, reward_sum=sg_average_reward.ph_sum, reward_weight=sg_average_reward.ph_count) self.op_score = self.Op(sg_average_reward.average) self.op_n_step = self.Op(sg_global_step.n) self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment) self.op_get_episode_cnt = self.Op(sg_episode_cnt.n) self.op_inc_episode_cnt = self.Op(sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment) self.op_submit_gradients = self.Call(graph.get_gradients_apply_routine(cfg.config)) self.op_initialize = self.Op(sg_initialize)
def build_graph(self): # Build graph sg_global_step = graph.GlobalStep() sg_episode_cnt = graph.GlobalStep() sg_actor_weights = ActorNetwork().weights sg_critic_weights = CriticNetwork().weights sg_actor_target_weights = ActorNetwork().weights sg_critic_target_weights = CriticNetwork().weights # needs reassign weights from actor & critic to target networks sg_init_actor_target_weights = \ graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op sg_init_critic_target_weights = \ graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op sg_update_actor_target_weights = \ graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op sg_update_critic_target_weights = \ graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op sg_actor_optimizer = optimizer.AdamOptimizer( cfg.config.actor_learning_rate) sg_critic_optimizer = optimizer.AdamOptimizer( cfg.config.critic_learning_rate) sg_actor_gradients = optimizer.Gradients(sg_actor_weights, optimizer=sg_actor_optimizer) sg_critic_gradients = optimizer.Gradients( sg_critic_weights, optimizer=sg_critic_optimizer) sg_initialize = graph.Initialize() # Expose public API self.op_get_weights = self.Ops(sg_actor_weights, sg_actor_target_weights, sg_critic_weights, sg_critic_target_weights) self.op_init_target_weights = self.Ops(sg_init_actor_target_weights, sg_init_critic_target_weights) self.op_update_target_weights = self.Ops( sg_update_actor_target_weights, sg_update_critic_target_weights) self.op_apply_actor_gradients = self.Ops( sg_actor_gradients.apply, sg_global_step.increment, gradients=sg_actor_gradients.ph_gradients, increment=sg_global_step.ph_increment) self.op_apply_critic_gradients = self.Op( sg_critic_gradients.apply, gradients=sg_critic_gradients.ph_gradients) self.op_n_step = self.Op(sg_global_step.n) self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment) self.op_get_episode_cnt = self.Op(sg_episode_cnt.n) self.op_inc_episode_cnt = self.Op( sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment) self.op_initialize = self.Op(sg_initialize)