env = Maze(height=10, width=10) MonteCarlo_brain_ = Model.Monte(greedy_rate=0.9, learning_rate=0.9, reward_decay=0.9) # Use two methods to evaluate the algorithm, # the time it takes to complete the appointed number of episode # the step it takes to converge # (The converge is judged by the repeated steps: if reward in 10 sequent episodes beyond certain value) # The certain value is get by run 10000 episodes of it. (judge_method = 'sum of episodes') # When the judge_method == repeated steps, the agent will stop when the episode has judge_number of sequent episode # satisfied the converge condition. # When the judge_method == sum of episodes, the agent will stop when the episode reach the judge number. # The delay_time parameter in the update is set aims to show us the action of the agent if we want.(0.01 is enough) T1 = time.perf_counter() update(judge_number=1, judge_method='sum of episodes', delay_time=0.00) # update(judge_number=6, judge_method='repeated steps', delay_time=0.00) T2 = time.perf_counter() print('Time spend :%s ms' % ((T2 - T1) * 1000)) result_display.result_plot(x=plot_episode, y=plot_sum_reward, x_name='Episode', y_name='Sum of Reward', title='MonteCarlo Learning') print(MonteCarlo_brain_.table_result()) print(f'episode:{max(plot_episode)}') print('-----------End of All-------------') env.mainloop()
# This 'if' judgement is very special, the function is that if we import this file in other places, # Rather than running this file directly, the content below will not be processed. if __name__ == "__main__": print('-----------Start-------------') env = Maze(height=10, width=10) Q_brain_ = Model.Qlearning(greedy_rate=0.9, learning_rate=0.01, reward_decay=0.9) # Use two methods to evaluate the algorithm, # the time it takes to complete the appointed number of episode # the step it takes to converge (judged by the repeated steps: if the reward in 10 sequent episodes are the same # We roughly think the algorithm started to converge # When the judge_method == numbers of repeated steps, the T1 = time.perf_counter() update(judge_number=10, judge_method='repeated steps', delay_time=0.00) # update(judge_number=50, judge_method='sum of episodes', delay_time=0.00) T2 = time.perf_counter() print('Time spend :%s ms' % ((T2 - T1) * 1000)) result_display.result_plot(x=plot_episode, y=plot_sum_reward, x_name='Episode', y_name='Reward', title='Q Learning') np.set_printoptions(threshold=len(Q_brain_.table_result())) # Some times the data can not be shown cause there are two many of them. Default of the threshold is 1000 print(Q_brain_.table_result()) print(f'episode:{max(plot_episode)}') print('-----------End of All-------------') env.mainloop()