def test(args): env_id, want_reward, generations, layers = args print(f'START({env_id})') with gym.make(env_id) as env: layers = (*env.observation_space.shape, *layers) if isinstance(env.action_space, Box): layers = (*layers, *env.action_space.shape) else: layers = (*layers, env.action_space.n) net = Net.random(layers) net.train(env, generations, render=False, print_stats=False) n = 10 # times to run evaluation before taking average reward = sum(net.evaluate(env) for _ in range(n)) / n if reward < want_reward: print(f'FAILED({env_id}): want {reward} < {want_reward}') else: print(f'SOLVED({env_id}) reward: {reward} >= {want_reward}')
from main import Net def heading(msg): print(('=' * 20) + ' ' + msg + ' ' + ('=' * 20)) # Simple handcrafted test of net.set_from_params heading('Test handcrafted') net = Net.random((3, 2, 4, 6)) p = net.params() p[-1] = 100 p[0] = 200 print([w.shape for w in net.weights]) net = Net.from_params(p, net.layers) print(f'Biases {[b.shape for b in net.biases]}\n', net.biases) print(f'Weights {[w.shape for w in net.weights]}\n', net.weights) assert net.biases[0][0] == 200 got = net.weights[-1][-1][-1] assert got == 100, f'want 100 got {got}' # =========================== # global test, add 20 to all (does not test structure!) heading('Test add 20') net = Net.random((3, 2, 4, 5)) p = net.params() p = p + 20 net = Net.from_params(p, net.layers)