Exemplo n.º 1
0
while e < n_epochs:
    frame = 0
    loss = 0.0
    Q_max = 0.0
    env.reset()
    state_t_1, reward_t, terminal = env.observe()
    # 終了までアクションして経験を積む
    # 経験が一定以上になったら経験から学習する
    loops += 1

    while not terminal:
        state_t = state_t_1

        exploration = 0.1 if args.load and not start_replay else agent.exploration

        action_t, is_random = agent.select_action([state_t], exploration)
        env.execute_action(action_t)
        state_t_1, reward_t, terminal = env.observe()

        # 楽観的初期値法
        # 最初のK回は各アクションに対して同じ報酬を得られたことにする
        if is_optimistic_epoch(loops, optimistic_num) and not args.load:
            action_t = optimistic_action(env, loops, optimistic)
            reward_t = 1 if action_t == 0 else 1

        start_replay = False
        start_replay = agent.store_experience([state_t], action_t, reward_t,
                                              [state_t_1], terminal)

        if start_replay:
            do_replay_count += 1
Exemplo n.º 2
0
start_date = utils.format(args.start_date)
end_date = utils.format(args.end_date)

env = Env(start_date, end_date)
agent = Agent(env.actions, len(env.columns), env.state_size, args.memory_size)
agent.load_model()

terminal = False
total_frame = 0
max_step = 0
frame = 0
state_t, reward_t, terminal = env.observe()
while not terminal:

    action_t, is_random = agent.select_action([state_t], 0.0)
    env.execute_action(action_t)

    state_t, reward_t, terminal = env.observe()

    frame += 1
    total_frame += 1
    if max_step < env.step:
        max_step = env.step

    print("frame: %s, total_frame: %s, terminal: %s, action: %s, reward: %s" %
          (frame, total_frame, terminal, action_t, reward_t))

backend.clear_session()
print("max_step: %s, score: %s" % (max_step, env.score))