def _create_graph(self): if self.reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse worker_device = "/job:worker/task:%d" % self.index + self.device print("worker_device:", worker_device) with tf.device(tf.train.replica_device_setter(worker_device=worker_device, cluster=self.cluster)): self.results_sum = tf.get_variable(trainable=False, name="results_sum", shape=[], initializer=tf.zeros_initializer) self.game_num = tf.get_variable(trainable=False, name="game_num", shape=[], initializer=tf.zeros_initializer) self.global_steps = tf.get_variable(trainable=False, name="global_steps", shape=[], initializer=tf.zeros_initializer) self.mean_win_rate = tf.summary.scalar('mean_win_rate_dis', self.results_sum / self.game_num) self.merged = tf.summary.merge([self.mean_win_rate]) self.dynamic_net = DynamicNetwork('train', self.sess, load_path=self.dynamic_load_path, save_path=self.dynamic_save_path) scope = "PolicyNN" with tf.variable_scope(scope): ob_space = C._SIZE_SIMPLE_INPUT act_space_array = C._SIZE_MAX_ACTIONS self.policy = Policy_net('policy', self.sess, ob_space, act_space_array) self.policy_old = Policy_net('old_policy', self.sess, ob_space, act_space_array) self.policy_ppo = PPOTrain('PPO', self.sess, self.policy, self.policy_old, epoch_num=P.src_epoch_num) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) self.policy_saver = tf.train.Saver(var_list=var_list)
def _create_graph(self): if self.reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse worker_device = "/job:worker/task:%d" % self.index + self.device with tf.device(tf.train.replica_device_setter(worker_device=worker_device, cluster=self.cluster)): self.results_sum = tf.get_variable(name="results_sum", shape=[], initializer=tf.zeros_initializer) self.game_num = tf.get_variable(name="game_num", shape=[], initializer=tf.zeros_initializer) self.global_steps = tf.get_variable(name="global_steps", shape=[], initializer=tf.zeros_initializer) self.win_rate = self.results_sum / self.game_num self.mean_win_rate = tf.summary.scalar('mean_win_rate_dis', self.results_sum / self.game_num) self.merged = tf.summary.merge([self.mean_win_rate]) mini_scope = "MiniPolicyNN" with tf.variable_scope(mini_scope): ob_space = _SIZE_MINI_INPUT act_space_array = _SIZE_MINI_ACTIONS self.policy = Policy_net('policy', self.sess, ob_space, act_space_array) self.policy_old = Policy_net('old_policy', self.sess, ob_space, act_space_array) self.policy_ppo = PPOTrain('PPO', self.sess, self.policy, self.policy_old, lr=P.mini_lr, epoch_num=P.mini_epoch_num) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.policy_saver = tf.train.Saver(var_list=var_list)