def __init__(self, env, bot):
     assert issubclass(env, NavEnvExt)
     assert issubclass(bot, QLBot)
     self.nav_env = env()
     self.bot = bot(NFQ(), 5)
     self.inter = Interaction(self.nav_env, self.bot)
     self.inter_test = Interaction(self.nav_env, self.bot)
     self.avg_rewards = []
     self.reward_stds = []
     self.test_performance()
class QLBotTest(object):

    def __init__(self, env, bot):
        assert issubclass(env, NavEnvExt)
        assert issubclass(bot, QLBot)
        self.nav_env = env()
        self.bot = bot(NFQ(), 5)
        self.inter = Interaction(self.nav_env, self.bot)
        self.inter_test = Interaction(self.nav_env, self.bot)
        self.avg_rewards = []
        self.reward_stds = []
        self.test_performance()

    def test_performance(self):
        avg, std = self.inter_test.compute_avg_reward(20, 50)
        print("test the performance of the robot")
        self.avg_rewards.append(avg)
        self.reward_stds.append(std)
    
    def run_one_cycle(self):
        self.inter.env.__init__()
        self.inter.interact_serie(500)
        self.test_performance()
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

import matplotlib.pyplot as plt
import matplotlib.animation as animation

from inter.interaction import Interaction
from environment.nav_env_ext import NavEnvExt
from bot.explore_bot import ExploreBot

nav_env = NavEnvExt()
bot = ExploreBot()
inter = Interaction(nav_env, bot)

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
ax1.axis('off')
ax2.axis('off')
ax3.axis('off')

obs1 = nav_env.top_down_view(128)
obs2 = nav_env.egocentric_view(128)
obs3 = nav_env.show_observation(inter._observation)

im1 = ax1.imshow(obs1, origin='lower')
im2 = ax2.imshow(obs2, origin='lower')
im3 = ax3.imshow(obs3, interpolation='none', origin='lower')


def animate(*args):
    inter.interact()
示例#4
0
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

import numpy as np

from inter.interaction import Interaction
from environment.nav_env_ego import NavEnvEgo
from bot.explore_bot import ExploreBot
from bot.state_repr_learn import StateReprLearn
from plot_exp_nav import plot_states

nav_env = NavEnvEgo()
bot = ExploreBot()
inter = Interaction(nav_env, bot)
inter.interact_serie(5000)

bot_srl = StateReprLearn(300, 2, bot.data)

# try to learn the representation
# bot_srl.gradient_descent(100)


# plot the result after n steps of gradient descent
def plot_x(n):
    plot_states(np.array(inter.env_state_his),
                bot_srl.states,
                'x',
                path="figures/nav_ego_{}_x".format(n))