env = Maze(height=10, width=10)
    MonteCarlo_brain_ = Model.Monte(greedy_rate=0.9,
                                    learning_rate=0.9,
                                    reward_decay=0.9)
    # Use two methods to evaluate the algorithm,
    # the time it takes to complete the appointed number of episode
    # the step it takes to converge
    # (The converge is judged by the repeated steps: if reward in 10 sequent episodes beyond certain value)
    # The certain value is get by run 10000 episodes of it. (judge_method = 'sum of episodes')

    # When the judge_method == repeated steps, the agent will stop when the episode has judge_number of sequent episode
    # satisfied the converge condition.
    # When the judge_method == sum of episodes, the agent will stop when the episode reach the judge number.

    # The delay_time parameter in the update is set aims to show us the action of the agent if we want.(0.01 is enough)

    T1 = time.perf_counter()
    update(judge_number=1, judge_method='sum of episodes', delay_time=0.00)
    # update(judge_number=6, judge_method='repeated steps', delay_time=0.00)
    T2 = time.perf_counter()
    print('Time spend :%s ms' % ((T2 - T1) * 1000))
    result_display.result_plot(x=plot_episode,
                               y=plot_sum_reward,
                               x_name='Episode',
                               y_name='Sum of Reward',
                               title='MonteCarlo Learning')
    print(MonteCarlo_brain_.table_result())
    print(f'episode:{max(plot_episode)}')
    print('-----------End of All-------------')
    env.mainloop()
示例#2
0
# This 'if' judgement is very special, the function is that if we import this file in other places,
# Rather than running this file directly, the content below will not be processed.

if __name__ == "__main__":
    print('-----------Start-------------')
    env = Maze(height=10, width=10)
    Q_brain_ = Model.Qlearning(greedy_rate=0.9,
                               learning_rate=0.01,
                               reward_decay=0.9)
    # Use two methods to evaluate the algorithm,
    # the time it takes to complete the appointed number of episode
    # the step it takes to converge (judged by the repeated steps: if the reward in 10 sequent episodes are the same
    # We roughly think the algorithm started to converge
    # When the judge_method == numbers of repeated steps, the
    T1 = time.perf_counter()
    update(judge_number=10, judge_method='repeated steps', delay_time=0.00)
    # update(judge_number=50, judge_method='sum of episodes', delay_time=0.00)
    T2 = time.perf_counter()
    print('Time spend :%s ms' % ((T2 - T1) * 1000))
    result_display.result_plot(x=plot_episode,
                               y=plot_sum_reward,
                               x_name='Episode',
                               y_name='Reward',
                               title='Q Learning')
    np.set_printoptions(threshold=len(Q_brain_.table_result()))
    # Some times the data can not be shown cause there are two many of them. Default of the threshold is 1000
    print(Q_brain_.table_result())
    print(f'episode:{max(plot_episode)}')
    print('-----------End of All-------------')
    env.mainloop()