from gw_collect import Gridworld import pygame as pg import numpy as np import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F from copy import deepcopy from vicero.algorithms.qlearning import Qlearning #env = Gridworld(width=6, height=6, cell_size=32, agent_pos=(0, 3), food_pos=[(0, 0), (3, 3), (4, 5), (2, 0)]) env_a = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) env_b = Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(0, 3), (3, 3)]) pg.init() screen = pg.display.set_mode( (env_a.cell_size * env_a.width, env_a.cell_size * env_a.height)) env_a.screen = screen clock = pg.time.Clock()
if len(durations_t) >= 100: means = durations_t.unfold(0, 100, 1).mean(1).view(-1) means = torch.cat((torch.zeros(99), means)) plt.plot(means.numpy(), c='green') plt.pause(0.001) print('Set A') histories_a = [] env_list = [] for i in range(10): env_list.append(Gridworld(width=4, height=4, cell_size=32, seed=(20 + i))) #for i in [8, 0, 2, 4, 7]: # env_list.append(Gridworld(width=4, height=4, cell_size=32, seed=i)) env = MultitaskEnvironment(env_list) for _ in range(repetitions): print('x') dqn = DQN(env, qnet=NeuralNet(64, 4).double(), plotter=plot, render=False, memory_length=2000, gamma=.99, alpha=.001, epsilon_start=0.1,
state = torch.from_numpy(state) env_diffs.append( torch.sum((rf_a.policy_net(state) - rf_b.policy_net(state))**2).item()) print('{}/{} mean difference: {:.4f}'.format(ne + 1, iterations, np.mean(env_diffs))) all_mean_diffs.append(np.mean(env_diffs)) absolutely_all_diffs.append(all_mean_diffs) return all_mean_diffs[-1] envs = [(Gridworld(width=4, height=4, cell_size=32, agent_pos=(2, 0), food_pos=[(1, 3), (3, 3)]), Gridworld(width=4, height=4, cell_size=32, agent_pos=(0, 0), food_pos=[(1, 3), (3, 3)]))] for env_pair in envs: print(env_diff(env_pair[0], env_pair[1], 50, 100)) for diff in absolutely_all_diffs: plt.plot(diff) #plt.savefig('test.png')
if len(durations_t) >= 100: means = durations_t.unfold(0, 100, 1).mean(1).view(-1) means = torch.cat((torch.zeros(99), means)) plt.plot(means.numpy(), c='green') plt.pause(0.001) histories_all = [] env_list = [] for i in [8, 0, 2, 4, 7, 22, 23, 24, 25, 51]: print(i) env = Gridworld(width=4, height=4, cell_size=32, seed=i) history = [] for _ in range(repetitions): dqn = DQN(env, qnet=LinRegNet(64, 4).double(), plotter=None, render=False, memory_length=2000, gamma=.99, alpha=.001, epsilon_start=0.1, plot_durations=True) dqn.train(training_iterations, 4, plot=False) history.append(dqn.history)
def average_environment_duration_dqn(env_set, iterations_per_env): all_durations = [] for env in env_set: all_durations.append('env={},mean_duration={}'.format( env, np.mean(dqn_benchmark(env, iterations_per_env)))) print(all_durations[-1]) return all_durations def average_environment_duration_rein(env_set, iterations_per_env): all_durations = [] for env in env_set: all_durations.append('env={},mean_duration={}'.format( env, np.mean(rein_benchmark(env, iterations_per_env)))) print(all_durations[-1]) return all_durations #env = Gridworld(width=4, height=4, cell_size=32, seed=33) env_set = [] for i in range(10): env_set.append(Gridworld(width=4, height=4, cell_size=32, seed=i)) exp.run(average_environment_duration_rein, params={ 'env_set': env_set, 'iterations_per_env': 10 }, k=1)