#env = Gridworld(width=6, height=6, cell_size=32, agent_pos=(0, 3), food_pos=[(0, 0), (3, 3), (4, 5), (2, 0)]) env_a = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) env_b = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) pg.init() screen = pg.display.set_mode( (env_a.cell_size * env_a.width, env_a.cell_size * env_a.height)) env_a.screen = screen env_b.screen = screen clock = pg.time.Clock() def plot(history): plt.figure(2) plt.clf() durations_t = torch.DoubleTensor(history) plt.title('Training...') plt.xlabel('Episode') plt.ylabel('Duration') plt.plot(durations_t.numpy(), c='lightgray', linewidth=1) his = 50 if len(durations_t) >= his:
#env = Gridworld(width=6, height=6, cell_size=32, agent_pos=(0, 3), food_pos=[(0, 0), (3, 3), (4, 5), (2, 0)]) env_a = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) env_b = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) pg.init() screen = pg.display.set_mode( (env_a.cell_size * env_a.width, env_a.cell_size * env_a.height)) env_a.screen = screen clock = pg.time.Clock() def plot(history): plt.figure(2) plt.clf() durations_t = torch.FloatTensor(history) plt.title('Training...') plt.xlabel('Episode') plt.ylabel('Duration') plt.plot(durations_t.numpy(), c='lightgray', linewidth=1) his = 50 if len(durations_t) >= his: means = durations_t.unfold(0, his, 1).mean(1).view(-1)
from gw_collect import Gridworld import pygame as pg for i in range(10): env = Gridworld(width=4, height=4, cell_size=32, seed=i) env.reset() pg.init() screen = pg.display.set_mode( (env.cell_size * env.width, env.cell_size * env.height)) env.screen = screen env.draw(screen)