Пример #1
0
def worker_test(ps, replay_buffer, opt):
    agent = Actor(opt, job="main")

    test_env = Wrapper(gym.make(opt.env_name), opt.obs_noise, opt.act_noise,
                       opt.reward_scale, 3)

    agent.test(ps, replay_buffer, opt, test_env)
Пример #2
0
def worker_test(ps, replay_buffer, opt):

    agent = Actor(opt, job="main")

    keys, weights = agent.get_weights()

    time0 = time1 = time.time()
    sample_times1, steps, size = ray.get(replay_buffer.get_counts.remote())
    max_ret = -1000

    env = gym.make(opt.env_name)

    while True:
        weights = ray.get(ps.pull.remote(keys))
        agent.set_weights(keys, weights)

        ep_ret = agent.test(env, replay_buffer)
        sample_times2, steps, size = ray.get(replay_buffer.get_counts.remote())
        time2 = time.time()
        print("test_reward:", ep_ret, "sample_times:", sample_times2, "steps:",
              steps, "buffer_size:", size)
        print('update frequency:',
              (sample_times2 - sample_times1) / (time2 - time1), 'total time:',
              time2 - time0)

        if ep_ret > max_ret:
            ps.save_weights.remote()
            print("****** weights saved! ******")
            max_ret = ep_ret

        time1 = time2
        sample_times1 = sample_times2

        # if steps >= opt.total_epochs * opt.steps_per_epoch:
        #     exit(0)
        # if time2 - time0 > 30:
        #     exit(0)

        time.sleep(5)
Пример #3
0
def worker_test(ps, replay_buffer, opt):
    agent = Actor(opt, job="main")
    test_env = TradingEnv()
    agent.test(ps, replay_buffer, opt, test_env)
Пример #4
0
def worker_test(ps, node_buffer, opt):

    agent = Actor(opt, job="test")
    keys = agent.get_weights()[0]

    # test_env = gym.make(opt.env_name)
    ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    sys.path.append(ROOT)
    from trading_env import TradingEnv, FrameStack
    test_env = TradingEnv(action_scheme_id=3, obs_dim=38)

    init_time = time.time()
    save_times = 0
    checkpoint_times = 0

    while True:
        # weights_all for save it to local
        weights_all = ray.get(ps.get_weights.remote())
        weights = [weights_all[key] for key in keys]
        agent.set_weights(keys, weights)

        start_actor_step, start_learner_step, _ = get_al_status(node_buffer)
        start_time = time.time()

        ave_test_reward, ave_score = agent.test(test_env, 10)

        last_actor_step, last_learner_step, _ = get_al_status(node_buffer)
        actor_step = np.sum(last_actor_step) - np.sum(start_actor_step)
        learner_step = np.sum(last_learner_step) - np.sum(start_learner_step)
        alratio = actor_step / (learner_step + 1)
        update_frequency = int(learner_step / (time.time() - start_time))
        total_learner_step = np.sum(last_learner_step)

        print("---------------------------------------------------")
        print("average test reward:", ave_test_reward)
        print("average test score:", ave_score)
        print(
            "frame freq:",
            np.round((last_actor_step - start_actor_step) /
                     (time.time() - start_time)))
        print("actor_steps:", np.sum(last_actor_step), "learner_step:",
              total_learner_step)
        print("actor leaner ratio: %.2f" % alratio)
        print("learner freq:", update_frequency)
        print("Ray total resources:", ray.cluster_resources())
        print("available resources:", ray.available_resources())
        print("---------------------------------------------------")
        if learner_step < 100:
            alratio = 0
        agent.write_tb(ave_test_reward, ave_score, alratio, update_frequency,
                       total_learner_step)

        total_time = time.time() - init_time

        if total_learner_step // opt.save_interval > save_times:
            with open(
                    opt.save_dir + "/" + str(total_learner_step / 1e6) + "M_" +
                    str(ave_test_reward) + "_weights.pickle",
                    "wb") as pickle_out:
                pickle.dump(weights_all, pickle_out)
                print("****** Weights saved by time! ******")
            save_times = total_learner_step // opt.save_interval

        # save everything every checkpoint_freq s
        if total_time // opt.checkpoint_freq > checkpoint_times:
            print("save everything!")
            save_start_time = time.time()

            ps_save_op = [
                node_ps[i].save_weights.remote() for i in range(opt.num_nodes)
            ]
            buffer_save_op = [
                node_buffer[node_index][i].save.remote()
                for i in range(opt.num_buffers)
                for node_index in range(opt.num_nodes)
            ]
            ray.wait(buffer_save_op + ps_save_op,
                     num_returns=opt.num_nodes * opt.num_buffers + 1)

            print("total time for saving :", time.time() - save_start_time)
            checkpoint_times = total_time // opt.checkpoint_freq
Пример #5
0
def worker_test(ps, replay_buffer, opt):
    agent = Actor(opt, job="main")
    test_env = gym.make(opt.env_name)
    agent.test(ps, replay_buffer, opt, test_env)