示例#1
0
def run_me(parameter):
    done = False
    reward_sum = 0.
    s = env.reset()
    env.render('rgb_array')
    a = np.array([0.0, 0.0, 0.0])
    counter = 0
    for _ in range(100):
        obs = state_to_1_batch_tensor(s)
        _, _, _, z = V(obs)
        a = torch.from_numpy(a).float()
        a = torch.reshape(a, (1, 1, 3))
        z = torch.reshape(z, (1, 1, 32))
        _, _, _, (h, c) = M(z, a)
        z = z.detach().numpy()
        h = h.detach().numpy()
        z = z.reshape((1, 32))
        h = h.reshape((1, 256))
        # print(type(z))
        # print(type(h))
        # print(len(h))
        # # print(h.size())
        # print(z.shape)
        # print(h.shape)
        # z = z.squeez()
        a = get_a(z, h, parameter)
        # print(a)
        s, reward, done, _ = env.step(a)
        # env.render()
        reward_sum += reward
        counter += 1
        print(counter)
    print(counter)
    return reward_sum
def multiple_runs(v, on):
    env = CarRacing()
    z_set = []
    action_set = []

    for run in range(MAX_RUNS):
        zs = []
        actions = []
        state = env.reset()
        env.render() # must have!
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            obs = state_to_1_batch_tensor(state)
            _, _, _, z = v(obs)
            z = z.detach().numpy()
            z = z.reshape(32)
            # print(z.shape)
            # if game_time == 5:
            #     plt.imshow(state)
            #     plt.show()
            #     state = _process_frame(state)
            #     plt.imshow(state)
            #     plt.show()
            zs.append(z)
            actions.append(action)
            state, r, done, _ = env.step(action)

            # print(r)
            print(
                'RUN:{},GT:{},DATA:{}'.format(
                    run, game_time, len(actions)
                )
            )
            # if counter == REST_NUM:
            #
            #     position = np.random.randint(len(env.track))
            #     env.car = Car(env.world, *env.track[position][1:4])
            #     counter = 0
            # counter += 1
        zs = np.array(zs, dtype=np.float16)
        # print(zs.shape)
        actions = np.array(actions, dtype=np.float16)
        # print(actions.shape)

        # np.save(dst + '/' + save_name, frame_and_action)

        # np.savez_compressed(dst + '/' + save_name, action=actions, z=zs)
        z_set.append(zs)
        action_set.append(actions)
    z_set = np.array(z_set)
    # print(z_set.shape)
    action_set = np.array(action_set)
    # print(action_set.shape)
    save_name = name_this + '_{}.npz'.format(on)
    np.savez_compressed(dst + '/' + save_name, action=action_set, z=z_set)
示例#3
0
        if k == key.LEFT and a[0] == -1.0: a[0] = 0
        if k == key.RIGHT and a[0] == +1.0: a[0] = 0
        if k == key.UP: a[1] = 0
        if k == key.DOWN: a[2] = 0

    env = CarRacing()
    env.render()
    env.viewer.window.on_key_press = key_press
    env.viewer.window.on_key_release = key_release
    while True:
        env.reset()
        total_reward = 0.0
        steps = 0
        restart = False
        env.render()
        while True:
            s, r, done, info = env.step(a)
            total_reward += r
            if steps % 200 == 0 or done:
                # print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
                # print("step {} total_reward {:+0.2f}".format(steps, total_reward))
                pass
            steps += 1
            env.render()
            obs = state_to_1_batch_tensor(s)
            rec, _, _, _ = V(obs)
            img = rec.detach().numpy()
            img = one_batch_tensor_to_img(img)
            show_state(img)
            if done or restart: break
    env.monitor.close()