def run_me(parameter): done = False reward_sum = 0. s = env.reset() env.render('rgb_array') a = np.array([0.0, 0.0, 0.0]) counter = 0 for _ in range(100): obs = state_to_1_batch_tensor(s) _, _, _, z = V(obs) a = torch.from_numpy(a).float() a = torch.reshape(a, (1, 1, 3)) z = torch.reshape(z, (1, 1, 32)) _, _, _, (h, c) = M(z, a) z = z.detach().numpy() h = h.detach().numpy() z = z.reshape((1, 32)) h = h.reshape((1, 256)) # print(type(z)) # print(type(h)) # print(len(h)) # # print(h.size()) # print(z.shape) # print(h.shape) # z = z.squeez() a = get_a(z, h, parameter) # print(a) s, reward, done, _ = env.step(a) # env.render() reward_sum += reward counter += 1 print(counter) print(counter) return reward_sum
def multiple_runs(v, on): env = CarRacing() z_set = [] action_set = [] for run in range(MAX_RUNS): zs = [] actions = [] state = env.reset() env.render() # must have! # done = False counter = 0 for game_time in range(MAX_GAME_TIME): # env.render() action = generate_action() obs = state_to_1_batch_tensor(state) _, _, _, z = v(obs) z = z.detach().numpy() z = z.reshape(32) # print(z.shape) # if game_time == 5: # plt.imshow(state) # plt.show() # state = _process_frame(state) # plt.imshow(state) # plt.show() zs.append(z) actions.append(action) state, r, done, _ = env.step(action) # print(r) print( 'RUN:{},GT:{},DATA:{}'.format( run, game_time, len(actions) ) ) # if counter == REST_NUM: # # position = np.random.randint(len(env.track)) # env.car = Car(env.world, *env.track[position][1:4]) # counter = 0 # counter += 1 zs = np.array(zs, dtype=np.float16) # print(zs.shape) actions = np.array(actions, dtype=np.float16) # print(actions.shape) # np.save(dst + '/' + save_name, frame_and_action) # np.savez_compressed(dst + '/' + save_name, action=actions, z=zs) z_set.append(zs) action_set.append(actions) z_set = np.array(z_set) # print(z_set.shape) action_set = np.array(action_set) # print(action_set.shape) save_name = name_this + '_{}.npz'.format(on) np.savez_compressed(dst + '/' + save_name, action=action_set, z=z_set)
if k == key.LEFT and a[0] == -1.0: a[0] = 0 if k == key.RIGHT and a[0] == +1.0: a[0] = 0 if k == key.UP: a[1] = 0 if k == key.DOWN: a[2] = 0 env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release while True: env.reset() total_reward = 0.0 steps = 0 restart = False env.render() while True: s, r, done, info = env.step(a) total_reward += r if steps % 200 == 0 or done: # print("\naction " + str(["{:+0.2f}".format(x) for x in a])) # print("step {} total_reward {:+0.2f}".format(steps, total_reward)) pass steps += 1 env.render() obs = state_to_1_batch_tensor(s) rec, _, _, _ = V(obs) img = rec.detach().numpy() img = one_batch_tensor_to_img(img) show_state(img) if done or restart: break env.monitor.close()