def worker_test(ps, replay_buffer, opt): agent = Actor(opt, job="main") test_env = Wrapper(gym.make(opt.env_name), opt.obs_noise, opt.act_noise, opt.reward_scale, 3) agent.test(ps, replay_buffer, opt, test_env)
def worker_test(ps, replay_buffer, opt): agent = Actor(opt, job="main") keys, weights = agent.get_weights() time0 = time1 = time.time() sample_times1, steps, size = ray.get(replay_buffer.get_counts.remote()) max_ret = -1000 env = gym.make(opt.env_name) while True: weights = ray.get(ps.pull.remote(keys)) agent.set_weights(keys, weights) ep_ret = agent.test(env, replay_buffer) sample_times2, steps, size = ray.get(replay_buffer.get_counts.remote()) time2 = time.time() print("test_reward:", ep_ret, "sample_times:", sample_times2, "steps:", steps, "buffer_size:", size) print('update frequency:', (sample_times2 - sample_times1) / (time2 - time1), 'total time:', time2 - time0) if ep_ret > max_ret: ps.save_weights.remote() print("****** weights saved! ******") max_ret = ep_ret time1 = time2 sample_times1 = sample_times2 # if steps >= opt.total_epochs * opt.steps_per_epoch: # exit(0) # if time2 - time0 > 30: # exit(0) time.sleep(5)
def worker_test(ps, replay_buffer, opt): agent = Actor(opt, job="main") test_env = TradingEnv() agent.test(ps, replay_buffer, opt, test_env)
def worker_test(ps, node_buffer, opt): agent = Actor(opt, job="test") keys = agent.get_weights()[0] # test_env = gym.make(opt.env_name) ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(ROOT) from trading_env import TradingEnv, FrameStack test_env = TradingEnv(action_scheme_id=3, obs_dim=38) init_time = time.time() save_times = 0 checkpoint_times = 0 while True: # weights_all for save it to local weights_all = ray.get(ps.get_weights.remote()) weights = [weights_all[key] for key in keys] agent.set_weights(keys, weights) start_actor_step, start_learner_step, _ = get_al_status(node_buffer) start_time = time.time() ave_test_reward, ave_score = agent.test(test_env, 10) last_actor_step, last_learner_step, _ = get_al_status(node_buffer) actor_step = np.sum(last_actor_step) - np.sum(start_actor_step) learner_step = np.sum(last_learner_step) - np.sum(start_learner_step) alratio = actor_step / (learner_step + 1) update_frequency = int(learner_step / (time.time() - start_time)) total_learner_step = np.sum(last_learner_step) print("---------------------------------------------------") print("average test reward:", ave_test_reward) print("average test score:", ave_score) print( "frame freq:", np.round((last_actor_step - start_actor_step) / (time.time() - start_time))) print("actor_steps:", np.sum(last_actor_step), "learner_step:", total_learner_step) print("actor leaner ratio: %.2f" % alratio) print("learner freq:", update_frequency) print("Ray total resources:", ray.cluster_resources()) print("available resources:", ray.available_resources()) print("---------------------------------------------------") if learner_step < 100: alratio = 0 agent.write_tb(ave_test_reward, ave_score, alratio, update_frequency, total_learner_step) total_time = time.time() - init_time if total_learner_step // opt.save_interval > save_times: with open( opt.save_dir + "/" + str(total_learner_step / 1e6) + "M_" + str(ave_test_reward) + "_weights.pickle", "wb") as pickle_out: pickle.dump(weights_all, pickle_out) print("****** Weights saved by time! ******") save_times = total_learner_step // opt.save_interval # save everything every checkpoint_freq s if total_time // opt.checkpoint_freq > checkpoint_times: print("save everything!") save_start_time = time.time() ps_save_op = [ node_ps[i].save_weights.remote() for i in range(opt.num_nodes) ] buffer_save_op = [ node_buffer[node_index][i].save.remote() for i in range(opt.num_buffers) for node_index in range(opt.num_nodes) ] ray.wait(buffer_save_op + ps_save_op, num_returns=opt.num_nodes * opt.num_buffers + 1) print("total time for saving :", time.time() - save_start_time) checkpoint_times = total_time // opt.checkpoint_freq
def worker_test(ps, replay_buffer, opt): agent = Actor(opt, job="main") test_env = gym.make(opt.env_name) agent.test(ps, replay_buffer, opt, test_env)