Пример #1
0
    def validate(engine: Engine):
        res = validation.validation_run(env_tst, net, device=device)
        print("%d: tst: %s" % (engine.state.iteration, res))

        for key, val in res.items():
            engine.state.metrics[key + "_tst"] = val

        res = validation.validation_run(env_val, net, device=device)
        print("%d: val:%s" % (engine.state.iteration, res))

        for key, val in res.items():
            engine.state.metrics[key + "_val"] = val

        val_reward = res["episode_reward"]

        if getattr(engine.state, "best_val_reward", None) is None:
            engine.state.best_val_reward = val_reward

        if engine.state.best_val_reward < val_reward:
            print("Best validation reward updated: %.3f -> %.3f, model saved" %
                  (engine.state.best_val_reward, val_reward))

            engine.state.best_val_reward = val_reward
            path = saves_path / ("val_reward-%.3f.data" % val_reward)
            torch.save(net.state_dict(), path)
Пример #2
0
            if step_idx % EVAL_EVERY_STEP == 0:
                mean_val = common.calc_values_of_states(eval_states, net, device=device)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_v = common.calc_loss(batch, net, tgt_net.target_model, GAMMA ** REWARD_STEPS, device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % TARGET_NET_SYNC == 0:
                tgt_net.sync()

            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                idx = step_idx // CHECKPOINT_EVERY_STEP
                torch.save(net.state_dict(), os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % VALIDATION_EVERY_STEP == 0:
                res = validation.validation_run(env_tst, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(env_val, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)
Пример #3
0
def train_model(cuda, phase, premodel, pdays):
    """
    cuda : True / False
    phase : 1~3
    premodel: data/phase1_model.data
    pdays: integer
    """
    device = torch.device("cuda" if cuda else "cpu")
    phase = int(phase)
    if phase == 1:
        config = sconfig
    elif phase == 2:
        config = mconfig
    elif phase == 3:
        config = pconfig

    run_name = "v" + config.version + "-phase" + str(phase)
    saves_path = os.path.join("saves", run_name)
    os.makedirs(saves_path, exist_ok=True)

    save_name = ""

    writer = SummaryWriter(comment=run_name)

    prices_list, val_prices_list = data.load_prices(config.choices)

    if phase == 1:
        s_env = environ.StocksEnvS(prices_list)
        stock_env = s_env
        val_stock_env = environ.StocksEnvS(val_prices_list)
        save_name = "{}.data".format(run_name)
    elif phase == 2:
        # phase 1 의 network 그래프를 로드한다.
        s_env = environ.StocksEnvS(prices_list)
        prenet = models.SimpleFFDQN(s_env.observation_space.shape[0],
                                    s_env.action_space.n)  #.to(device)
        models.load_model(premodel, prenet)

        # phase2 환경 생성
        stock_env = environ.StocksEnvM(prices_list, prenet)
        val_stock_env = environ.StocksEnvM(val_prices_list, prenet)
        save_name = "{}.data".format(run_name)
    elif phase == 3:
        predict_days = int(pdays)
        stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7)
        val_stock_env = pdenviron.PredEnv(prices_list=prices_list,
                                          predict_days=7)
        save_name = "{}-{}.data".format(run_name, predict_days)

    net = models.SimpleFFDQN(stock_env.observation_space.shape[0],
                             stock_env.action_space.n).to(device)
    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.EpsilonGreedyActionSelector(config.epsilon_start)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        stock_env, agent, config.gamma, steps_count=config.reward_steps)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source,
                                                    config.replay_size)
    optimizer = optim.Adam(net.parameters(), lr=config.learning_rate)

    # main training loop
    step_idx = 0
    eval_states = None
    best_mean_val = None

    with common.RewardTracker(writer, np.inf,
                              group_rewards=100) as reward_tracker:
        while step_idx < config.end_step:
            step_idx += 1
            buffer.populate(1)
            selector.epsilon = max(
                config.epsilon_stop,
                config.epsilon_start - step_idx / config.epsilon_steps)

            new_rewards = exp_source.pop_rewards_steps()
            if new_rewards:
                reward_tracker.reward(new_rewards[0], step_idx,
                                      selector.epsilon)

            if len(buffer) < config.replay_initial:
                continue

            if eval_states is None:
                print("Initial buffer populated, start training")
                eval_states = buffer.sample(config.states_to_evaluate)
                eval_states = [
                    np.array(transition.state, copy=False)
                    for transition in eval_states
                ]
                eval_states = np.array(eval_states, copy=False)

            if step_idx % config.eval_every_step == 0:
                mean_val = common.calc_values_of_states(eval_states,
                                                        net,
                                                        device=device)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" %
                              (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    #torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(config.batch_size)
            loss_v = common.calc_loss(batch,
                                      net,
                                      tgt_net.target_model,
                                      config.gamma**config.reward_steps,
                                      device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % config.target_net_sync == 0:
                tgt_net.sync()

            if step_idx % config.checkpoint_every_step == 0:
                idx = step_idx // config.checkpoint_every_step
                torch.save(
                    net.state_dict(),
                    os.path.join(saves_path, "checkpoint-%d.data" % idx))

            if step_idx % config.validation_every_step == 0:
                res = validation.validation_run(stock_env, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(val_stock_env,
                                                net,
                                                device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)

        models.save_model(os.path.join(saves_path, save_name), net,
                          {"predict_days": predict_days})
Пример #4
0
                                     "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(sconfig.batch_size)
            loss_v = common.calc_loss(batch,
                                      net,
                                      tgt_net.target_model,
                                      sconfig.gamma**sconfig.reward_steps,
                                      device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % sconfig.target_net_sync == 0:
                tgt_net.sync()

            if step_idx % sconfig.checkpoint_every_step == 0:
                idx = step_idx // sconfig.checkpoint_every_step
                torch.save(
                    net.state_dict(),
                    os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % sconfig.validation_every_step == 0:
                res = validation.validation_run(stock_env, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(val_stock_env,
                                                net,
                                                device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)
Пример #5
0
            if step_idx % EVAL_EVERY_STEP == 0:
                mean_val = common.calc_values_of_states(eval_states, net, cuda=args.cuda)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_v = common.calc_loss(batch, net, tgt_net.target_model,
                                                      GAMMA ** REWARD_STEPS, cuda=args.cuda)
            loss_v.backward()
            optimizer.step()

            if step_idx % TARGET_NET_SYNC == 0:
                tgt_net.sync()

            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                idx = step_idx // CHECKPOINT_EVERY_STEP
                torch.save(net.state_dict(), os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % VALIDATION_EVERY_STEP == 0:
                res = validation.validation_run(env_tst, net, cuda=args.cuda)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(env_val, net, cuda=args.cuda)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)
Пример #6
0
            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                # idx = step_idx // CHECKPOINT_EVERY_STEP
                checkpoint = {
                    "obs_space": env.observation_space.shape[0],
                    "action_n": env.action_space.n,
                    "state_dict": net.state_dict()
                }
                with open(
                        os.path.join(saves_path,
                                     "checkpoint-%d.data" % step_idx),
                        "wb") as f:
                    torch.save(checkpoint, f)

            if step_idx % VALIDATION_EVERY_STEP == 0:
                net_processor.val_mode(batch_size=1)
                validation_episodes = min(np.int((1 / 1800) * step_idx + 100),
                                          MAX_VALIDATION_EPISODES)
                writer.add_scalar("validation_episodes", validation_episodes,
                                  step_idx)

                val_epsilon = max(
                    0, EPSILON_START - step_idx * 1.25 / EPSILON_STEPS)
                stats = validation.validation_run(env_val,
                                                  net,
                                                  episodes=validation_episodes,
                                                  epsilon=val_epsilon)
                common.valid_result_visualize(stats, writer, step_idx)

            if step_idx % WEIGHT_VISUALIZE_STEP == 0:
                net_processor.val_mode(batch_size=1)
                common.weight_visualize(net, writer)