plt.title('initial') plt.savefig(file_path + '0.png') num_episodes = 500 k = 1 for i in xrange(num_episodes): r_t, s_t = domain.reset() agent.reset() count = 0 cumulative_reward = 0 while s_t != None: # apply an action from the agent # the domain will return a 'None' state when terminating r_t, s_t = domain.step(agent.step(r_t, s_t)) count += 1 cumulative_reward += r_t # final update step for the agent agent.step(r_t, s_t) if i % 2 == 0: if render_value_fn: plt.gca().clear() plt.contourf(*getValueFn(valuefn)) plt.title('episode ' + str(i)) plt.savefig(file_path + str(k) + '.png') k +=1 # print cumulative reward it took to reach the goal