def __init__(self, env, bot): assert issubclass(env, NavEnvExt) assert issubclass(bot, QLBot) self.nav_env = env() self.bot = bot(NFQ(), 5) self.inter = Interaction(self.nav_env, self.bot) self.inter_test = Interaction(self.nav_env, self.bot) self.avg_rewards = [] self.reward_stds = [] self.test_performance()
class QLBotTest(object): def __init__(self, env, bot): assert issubclass(env, NavEnvExt) assert issubclass(bot, QLBot) self.nav_env = env() self.bot = bot(NFQ(), 5) self.inter = Interaction(self.nav_env, self.bot) self.inter_test = Interaction(self.nav_env, self.bot) self.avg_rewards = [] self.reward_stds = [] self.test_performance() def test_performance(self): avg, std = self.inter_test.compute_avg_reward(20, 50) print("test the performance of the robot") self.avg_rewards.append(avg) self.reward_stds.append(std) def run_one_cycle(self): self.inter.env.__init__() self.inter.interact_serie(500) self.test_performance()
import sys import os sys.path.insert(0, os.path.abspath('..')) import matplotlib.pyplot as plt import matplotlib.animation as animation from inter.interaction import Interaction from environment.nav_env_ext import NavEnvExt from bot.explore_bot import ExploreBot nav_env = NavEnvExt() bot = ExploreBot() inter = Interaction(nav_env, bot) fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5)) ax1.axis('off') ax2.axis('off') ax3.axis('off') obs1 = nav_env.top_down_view(128) obs2 = nav_env.egocentric_view(128) obs3 = nav_env.show_observation(inter._observation) im1 = ax1.imshow(obs1, origin='lower') im2 = ax2.imshow(obs2, origin='lower') im3 = ax3.imshow(obs3, interpolation='none', origin='lower') def animate(*args): inter.interact()
import sys import os sys.path.insert(0, os.path.abspath('..')) import numpy as np from inter.interaction import Interaction from environment.nav_env_ego import NavEnvEgo from bot.explore_bot import ExploreBot from bot.state_repr_learn import StateReprLearn from plot_exp_nav import plot_states nav_env = NavEnvEgo() bot = ExploreBot() inter = Interaction(nav_env, bot) inter.interact_serie(5000) bot_srl = StateReprLearn(300, 2, bot.data) # try to learn the representation # bot_srl.gradient_descent(100) # plot the result after n steps of gradient descent def plot_x(n): plot_states(np.array(inter.env_state_his), bot_srl.states, 'x', path="figures/nav_ego_{}_x".format(n))