示例#1
0
 def __init__(self, agent, env=None):
     self.lock = th.Lock()
     self.agent = agent
     # one and only
     self.para_list = self.get_parameter_list()
     self.farmer = farmer_class(self.para_list)
     self.ep_num = 0
     self.total_steps = 0
     self.history_reward = []
     self.ep_value = {}
     self.value_init()
     self.relative_time = 0
     self.average_len_of_episode = self.agent.args.max_pathlength
     self.num_rollouts = int(self.agent.args.timesteps_per_batch /
                             self.average_len_of_episode)
     self.rollout_count = 0
     self.rollout_paths = []
     self.iteration = 0
     self.log_scalar_name_list = [
         'reward', 'kl_div', 'entropy', 'surrogate_loss', 'value_loss'
     ]
     self.log_scalar_type_list = [
         tf.float32, tf.float32, tf.float32, tf.float32, tf.float32
     ]
     self.logger = Logger(self.agent.session,
                          self.agent.args.log_path + 'train',
                          self.log_scalar_name_list,
                          self.log_scalar_type_list)
     self.write_log = self.logger.create_scalar_log_method()
     self.start_time = time.time()
示例#2
0
 def __init__(self, agent, env=None):
     self.lock = th.Lock()
     self.agent = agent
     # one and only
     self.para_list = self.get_parameter_list()
     self.farmer = farmer_class(self.para_list)
     self.ep_num = 0
     self.total_steps = 0
     self.history_reward = []
     self.ep_value = {}
     self.value_init()
     self.relative_time = 0
     self.average_steps = self.agent.args.max_pathlength
     self.start_time = time.time()
示例#3
0
 def __init__(self, agent, env=None):
     self.lock = th.Lock()
     self.agent = agent
     # one and only
     self.farmer = farmer_class(self.agent.para_list)
     self.ep_num = 0
     self.total_steps = 0
     self.history_reward = []
     self.ep_value = {}
     self.value_init()
     self.relative_time = 0
     self.average_steps = self.agent.para_list["max_pathlength"]
     self.log_scalar_name_list = [
         'mean_reward', 'actor_loss', 'critic_loss'
     ]
     self.log_scalar_type_list = [tf.float32, tf.float32, tf.float32]
     self.logger = Logger(self.agent.session,
                          self.agent.para_list["log_path"] + 'train',
                          self.log_scalar_name_list,
                          self.log_scalar_type_list)
     self.write_log = self.logger.create_scalar_log_method()
     self.start_time = time.time()
示例#4
0
 def refarm(self):  # most time no use
     del self.farmer
     self.farmer = farmer_class()