Пример #1
0
    def getRandom3DInstance(self, with_velocity=True):
        env = ThreeDMountainCarEnv()
        env.reset()

        random_pos_x = np.random.uniform(low=env.min_position_x,
                                         high=env.max_position_y)
        random_pos_y = np.random.uniform(low=env.min_position_x,
                                         high=env.max_position_y)

        # TODO: calculates the maximum speed at this position
        random_velocity_x = np.random.uniform(
            low=-env.max_speed_x, high=env.max_speed_x) if with_velocity else 0
        random_velocity_y = np.random.uniform(
            low=-env.max_speed_y, high=env.max_speed_y) if with_velocity else 0

        state = [
            random_pos_x, random_pos_y, random_velocity_x, random_velocity_y
        ]
        env.set_state(state)
        action = np.random.randint(low=0, high=5)
        next_state, reward, done, info = env.step(action)
        return [state, action, next_state, reward, done]
Пример #2
0
def main():
    # env = gym.make("MountainCar-v0")
    env = ThreeDMountainCarEnv()
    act = deepq.load("mountaincar_model_working.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            # env.render()
            env.render_orthographic()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            print(act(obs[None])[0])
            print(obs)
            episode_rew += rew
        print("Episode reward", episode_rew)
Пример #3
0
import gym
import numpy as np
from matplotlib import pyplot as plt
import itertools
from lib.env.threedmountain_car import ThreeDMountainCarEnv

env = ThreeDMountainCarEnv()

state = env.reset()

for t in itertools.count():
    # action = env.action_space.sample()
    next_state, reward, done, info = env.step(0)
    # env.render() # yellow
    # env.render_y() #cyan
    env.render_orthographic()

    if done:
        break

    state = next_state

    # if t == 100:
    # 	env.close_gui()
    # 	break