def startGame(self): self._stepN = 0 self._action_point = 0 self._won = False if self._view is not None: self._view.draw(self._stepN, self._action_point) for self._stepN in range(1, 1000): step(self) # User defined if self._debug: if self._view is not None: self._view.draw(self._stepN, self._action_point) self.printMaze() if self._maze.isPlayerOnGoal(): self._won = True break if self._debug: if self._won: print("Bravo! (", self._action_point, "actions )") else: print("Echec! Tu as dépassé le nombre d'actions permises") return (self._won, self._action_point, self._stepN)
def _twisted_stop(self): """ Stop Twisted reactor and wait until it is done """ if self._twisted_stopped: return self.stop() while not self._twisted_stopped: main.step()
def evaluate_agent(agent, env, logger): global skip_frame_rate for i in range(0, 30): env.reset() obs = preprocess_observation(env.reset(), img_size) # initial_buffer = [] # for j in range(skip_frame_rate): # initial_buffer.append(obs) state = np.maximum(obs, obs) # np.array(initial_buffer) current_state = [state, state, state, state] current_state = np.asarray([current_state]) reward = 0 # agent actions done = False while not done: action = agent.get_action(current_state) logger.add_agent_action(action) obs, r, done, info = step(env, action, agent) # next_state = np.array(next_state) next_state = np.asarray([[ current_state[0][1], current_state[0][2], current_state[0][3], obs ]]) current_state = next_state reward += r logger.add_reward(reward) logger.save_agent_action() logger.save_agent_action_avg_std()
def run_agent(agent, env): global skip_frame_rate for i in range(0, 100): env.reset() obs = preprocess_observation(env.reset(), img_size) state = np.maximum(obs, obs) current_state = [state, state, state, state] current_state = np.asarray([current_state]) reward = 0 done = False while not done: action = agent.get_action(current_state) obs, r, done, info = step(env, action, agent) next_state = np.asarray([[ current_state[0][1], current_state[0][2], current_state[0][3], obs ]]) current_state = next_state reward += r print(reward)
def test_step_moons(): input = """<x=-1, y=0, z=2> <x=2, y=-10, z=-7> <x=4, y=-8, z=8> <x=3, y=5, z=-1>""" moons = parse_input(input) step(moons) assert moons[0].position == [2, -1, 1] assert moons[1].position == [3, -7, -4] assert moons[2].position == [1, -7, 5] assert moons[3].position == [2, 2, 0] assert moons[0].velocity == [3, -1, -1] assert moons[1].velocity == [1, 3, 3] assert moons[2].velocity == [-3, 1, -3] assert moons[3].velocity == [-1, -3, 1]
def test_v1_part1(self): from main import step, parseInput fish = parseInput(sample1) for i in range(0, 80): fish = step(fish) self.assertEqual(len(fish), 5934)
def test_step(digit,permutation,answer): assert answer == main.step(digit, permutation)
while True: visited = visited_count[state] if state in visited_count else 0 epsilon = n0 / (n0 + visited) # count the state visit if state in visited_count: visited_count[state] += 1 else: visited_count[state] = 1 # choose action action = e_greedy(q, state, epsilon) # execute action, update curr total return counts reward, next_state = step(state, action) if (state, action) not in curr_g: curr_g[(state, action)] = 0 for i in curr_g.keys(): curr_g[i] += reward if next_state is None: break else: state = next_state # update value function for i in curr_g.keys(): # count the state-action visit if i in av_count: av_count[i] += 1
fig.savefig(lossfname) plt.close() def plot_scatter(model, in_, out_, scatterfname): pred = model(in_) fig, ax = plt.subplots() ax.scatter(pred.data.numpy(), out_.data.numpy()) fig.savefig(scatterfname) plt.close() for N in [50, 500, 1000, 5000]: for dx in [1, 10, 100, 1000, 10000, 100000]: t0 = time.time() # lossfname = "./plots/loss-{}-{}.png".format(N, dx) # scatterfname = "./plots/scatter-{}-{}.png".format(N, dx) # print("running ", N, dx) start = time.time() x, y, w = main.init_data(N, dx, dy) m, o = main.instantiate_model(dx, dy) # losses = [] for i in range(n_iterations): m, o, l = main.step(x, y, m, o) # losses.append(l) t1 = time.time() print("{} {} {}".format(N, dx, "%.2f" % (t1 - t0))) # plot_loss(losses, lossfname) # plot_scatter(m, x, y, scatterfname)