示例#1
0
def main(name, size):
    model_location = ("logs/"+name+"NoFrameskip-v4_"+str(size)+"/model.pkl")
    env_name = name+"NoFrameskip-v4"
    #env = gym.make(name+"NoFrameskip-v4")

    env = make_atari(env_name)
    env = deepq.wrap_atari_dqn(env)
    act = deepq.load(model_location)
    episodeRewards = []

    for i in range(100):
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            # env.render()
            action = act(obs[None])[0]
            obs, rew, done, _ = env.step(action)

            episode_rew += rew
        print(episode_rew)
        episodeRewards.append(episode_rew)

    output = name+","+str(size)
    for r in episodeRewards:
        output+=","+str(r)
    output+="\n"
    with open("validationStats.csv", 'a') as myfile:
        myfile.write(output)
示例#2
0
def main():
    if not os.path.exists("final_models/thor"):
        os.makedirs("final_models/thor")
        os.makedirs("final_models/thor/AB")
        os.makedirs("final_models/thor/separate_actions")

    env = gym.make(ENV)
    act = deepq.load(MODEL)
    action_file = open(BC_FILE, "w")
    steps = 0

    while steps < STEPS:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done and steps < STEPS:
            state_1 = cv2.resize(obs, (128, 128))

            if np.random.uniform(0, 1) < .75:
                action = act(obs[None])[0]
            else:
                action = env.action_space.sample()

            obs, rew, done, _ = env.step(action)
            state_2 = cv2.resize(obs, (128, 128))

            cv2.imwrite(FILE + str(steps) + ".png",
                        np.hstack([state_1, state_2]))
            action_file.write("[" + str(action) + "]\n")
            episode_rew += rew
            steps += 1

        print(steps)
        print("Episode reward", episode_rew)

    action_file.close()
示例#3
0
def main(path="./models/deepq/mario_reward_1736.7.pkl"):
    step_mul = 16
    steps = 200

    FLAGS = flags.FLAGS
    flags.DEFINE_string("env", "SuperMarioBros-v0", "RL environment to train.")
    flags.DEFINE_string("algorithm", "deepq", "RL algorithm to use.")

    FLAGS(sys.argv)
    # 1. Create gym environment
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)

    act = deepq.load(path)
    nstack = 4
    nh, nw, nc = env.observation_space.shape
    history = np.zeros((1, nh, nw, nc * nstack), dtype=np.uint8)

    obs, done = env.reset(), False
    # history = update_history(history, obs)
    episode_rew = 0
    while not done:
        env.render()
        action = act([obs])[0]
        obs, rew, done, _ = env.step(action)
        # history = update_history(history, obs)
        episode_rew += rew
        print("action : %s reward : %s" % (action, rew))

    print("Episode reward", episode_rew)
def main():
    rospy.init_node('movingcube_onedisk_walk_gym_predict',
                    anonymous=True,
                    log_level=rospy.WARN)
    env = gym.make("MyMovingCubeOneDiskWalkEnv-v0")

    # Get Path to saved model
    rospack = rospkg.RosPack()
    pkg_path = rospack.get_path('my_moving_cube_pkg')
    models_dir_path = os.path.join(pkg_path, "models_saved")
    assert os.path.exists(
        models_dir_path
    ), " NO models saved exists in path =" + str(models_dir_path)
    out_model_file_path = os.path.join(models_dir_path, "movingcube_model.pkl")

    act = deepq.load(out_model_file_path)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            print(str(obs))
            # TODO: Work still , gives some errors
            action = act(obs)
            print(str(action))
示例#5
0
def main():
    env = gym.make("PokerHistory-v0")

    # td = "/tmp/tmpugaszyfi"
    # model_file = os.path.join(td, "model")
    # U.load_state(model_file)
    # act.save("poker_model_test.pkl")
    act = deepq.load("poker_model.pkl")

    rL = []
    for num_steps in range(1000):
        obs = env.reset()
        done = False

        episode_rew = 0
        action = None
        while not done:  #this will spit out a random number of hands bc who knows if the state is done in a random pull
            env.print_state()
            action = act(obs[None])[0]
            print("Action choice: ", str(action), ":", env.action_dict[action])
            obs, rew, done, _ = env.step(action)
            # import pdb; pdb.set_trace()
            episode_rew += rew  # if the hand states are random this has no meaning.
            rL.append(rew)
        print("Episode reward", episode_rew)
    print("Mean and stdev of reward is: {}, {}".format(np.mean(rL),
                                                       np.std(rL)))
示例#6
0
def run_dqn(output_dir, train):
    # Create the environment.
    env = gym.make('CartPole-v0')
    env = pw.CartPoleParetoWrapper(env)
    saved_file = os.path.join(output_dir, 'cartpole_dqn.pkl')
    if train:
        model = deepq.models.mlp([64])
        act = deepq.learn(
            env,
            q_func=model,
            lr=1e-3,  # Recommended: 1e-3
            max_timesteps=100000,  # Recommended: 100000
            buffer_size=50000,  # Recommended: 50000
            exploration_fraction=0.1,  # Recommended: 0.1
            exploration_final_eps=0.02,  # Recommended: 0.02
            print_freq=10,  # Recommended: 10
            callback=dqn_callback)
        # Save file.
        print('[dqn] Saving model to', saved_file)
        act.save(saved_file)
    else:
        act = deepq.load(saved_file)

    # Replay.
    obs, done = env.reset(), False
    episode_rew = 0
    while not done:
        env.render()
        obs, rew, done, _ = env.step(act(obs[None])[0])
        episode_rew += rew
    print("[dqn] Episode reward", episode_rew)
def main():
    env = gym.make("CartPole-v0")
    act = deepq.load("cartpole_model.pkl")

    episode=0
    chain_dump=[]
    trans=[]
    while episode<=10:    # True:
        obs, done = env.reset(), False
        episode_rew = 0
        trans=[]
        while not done:
            env.render()
            r=random.uniform(0,1)
            if(r <= 0.55):
                action = act(obs[None])[0]
            else:
                action = random.randint(0,1)
            new_obs, rew, done, _ = env.step(action)

            trans.append([obs, action, rew, new_obs])
            obs=new_obs
            episode_rew += rew
        episode=episode+1
        print("Episode reward", episode_rew)
        chain_dump.append(np.vstack(trans))

    filehandler = open("policy_transitions.seq","wb")
    pickle.dump(chain_dump,filehandler, protocol=1)
    filehandler.close()
    print('policy sequences saved',replay_buffer.__len__())
    print('done')
示例#8
0
    def main(self):
        env = gym.make("RubiksCube-v0")
        env.setScramble(self.m, self.m)
        act = deepq.load(self.path)
        total_reward = []

        for i in range(self.num_episodes):
            obs, done = env.reset(), False
            episode_rew = 0

            while not done:
                # uncomment this if you want it to render the cube
                # env.render()

                obs, rew, done, _ = env.step(act(obs[None], update_eps=0)[0])
                episode_rew += rew
            total_reward.append(episode_rew)
            # uncomment this if you want it to render the last state
            # env.render()

            print("Episode reward: {}".format(episode_rew))
            print("scramble, action_history: {}".format(env.getlog()))
            print("-----------------------")

        print("total: {}, Solved: {}, Unsolved: {}".format(
            len(total_reward), total_reward.count(1), total_reward.count(0)))
示例#9
0
def test_bandits(n_arm, cost):
    env = MetaBanditEnv(n_arm, 25, cost)
    cost_i = np.abs(np.logspace(-4, -1, 7) - cost).argmin()
    filename = "data/bandit_dqn/weights/bandit_" + str(n_arm) + "_" + str(
        cost) + "_model.pkl"
    act = deepq.load(filename)
    tot_rew = 0
    dfs = []
    for _ in range(2000):
        obs, done = env.reset(), False
        temp_rew = 0
        obs_count = 0
        while not done:
            obs, rew, done, _ = env.step(act(obs[None])[0])
            temp_rew += rew
            obs_count += 1
        df = {
            'util': temp_rew,
            'observations': obs_count - 1,
            'agent': 'dqn',
            'n_arm': n_arm,
            'max_obs': 25,
            'cost': cost
        }
        tot_rew += temp_rew
        dfs.append(df)
    print(
        str(n_arm) + " arm, cost: " + str(cost) + ", reward: " +
        str(tot_rew / 2000))
    data = pd.DataFrame(dfs)
    print(data.util.mean())
    store = pd.HDFStore('data/bandit_dqn/results/dqn_results_' + str(n_arm) +
                        "_" + str(cost_i) + '.h5')
    store['data'] = data
    store.close()
示例#10
0
def main():
    env = gym.make("simplePendulum-v1")
    funame = args.filename
    act = deepq.load(funame)
    otim = os.stat(funame).st_mtime

    while True:
        mtim = os.stat(funame).st_mtime
        if mtim != otim:
            #act = None

            #act = deepq.load(funame)
            print("Loaded new controller...")
        obs, done = env.reset(), False
        episode_rew = 0
        nsteps = 0
        while nsteps < 500:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
            nsteps += 1
        print("Episode reward = ", round(episode_rew, 2))
        #print("Angle = {0:2.2f} deg, Vel = {1:2.2f} deg/s, Torque = {2:2.2f} N/m".format(180/np.pi*(obs[0]), 180/np.pi*obs[1], obs[2]))
        print("Angle = {0:2.2f} deg, Vel = {1:2.2f} deg/s".format(
            180 / np.pi * np.arccos(obs[0]), 180 / np.pi * obs[2]))
        print("  ")
示例#11
0
def test(env, load_path, num_episodes=1000):
    act = deepq.load(load_path + ".pkl")
    success_count = 0.0
    test_render_file = open(load_path + ".txt", "w")
    for i in range(num_episodes):
        obs, done = env.reset(), False
        episode_rew = 0.0
        while not done:
            render_string = env.render(mode='ansi') + "\n"
            test_render_file.write(render_string)
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        render_string = env.render(mode='ansi') + "\n"
        test_render_file.write(render_string)
        if (episode_rew > -env.n):
            print("Episode successful with reward ", episode_rew)
            test_render_file.write("Episode successful with reward " +
                                   str(episode_rew) + "\n")
            success_count += 1.0
        else:
            print("Episode unsuccessful with reward ", episode_rew)
            test_render_file.write("Episode unsuccessful with reward " +
                                   str(episode_rew) + "\n")
    success_rate = success_count / num_episodes
    print("Success Rate: ", success_rate)
    test_render_file.write("Success Rate: " + str(success_rate) + "\n")
    test_render_file.close()
    return success_rate
示例#12
0
    def __init__(self, force_mag, reg):
        self.reg = reg
        self.iters = 100
        self.T = 200
        self.trials = 1

        self.alpha = 0.1
        self.lambda_prior = list(np.ones(10))

        self.eta = 1.0
        self.inner_eta = self.eta
        self.params = {}
        self.params['T'] = self.T
        self.params['iters'] = self.iters
        self.act = deepq.load("cartpole_model_alt2.pkl")

        if self.reg:
            self.base_dir = 'data/reg_cartpole_force_mag' + str(force_mag)
        else:
            self.base_dir = 'data/cartpole_force_mag' + str(force_mag)
        self.dir = os.path.join(self.base_dir, 'dagger')
        self.prefix = 'dagger'
        self.path = os.path.join(self.dir, self.prefix)
        self.force_mag = force_mag

        self.t = .01
示例#13
0
def test(env, load_path, num_episodes=10000):
    act = deepq.load(load_path + ".pkl")
    success_count = 0.0
    test_render_file = open(load_path + ".txt", "w")
    for i in range(num_episodes):
        obs, done = env.reset(seed=i), False
        env_string = "Initial State: " + str(
            (env.initial_state).T) + "\nGoal State: " + str(
                (env.goal).T) + "\nMax_reward: " + str(env.reward_max) + "\n"
        print(env_string)
        test_render_file.write(env_string)
        episode_rew = 0.0
        while not done:
            render_string = env.render(mode='ansi') + "\n"
            test_render_file.write(render_string)
            obs, rew, done, _ = env.step(
                act(np.concatenate([obs, env.goal])[None])[0])
            episode_rew += rew
        render_string = env.render(mode='ansi') + "\n"
        test_render_file.write(render_string)
        if (episode_rew > -env.n):
            print("Episode successful with reward ", episode_rew)
            test_render_file.write("Episode successful with reward " +
                                   str(episode_rew) + "\n")
            success_count += 1.0
        else:
            print("Episode unsuccessful with reward ", episode_rew)
            test_render_file.write("Episode unsuccessful with reward " +
                                   str(episode_rew) + "\n")
    success_rate = success_count / num_episodes
    print("Success Rate: ", success_rate)
    test_render_file.write("Success Rate: " + str(success_rate) + "\n")
    test_render_file.close()
    return success_rate
示例#14
0
def enjoy(board_size):
    """enjoy trained gomoku AI play board whose size is board_size x board_size.

    Parameters
    ----------
    board_size: int
        Size of board in one dimension, example:
        board_size = 9 --> board have size 9x9

    Returns
    -------
    None
    """
    env = gym.make('Gomoku{}x{}-arena-v0'.format(board_size, board_size),
                   __val_opponent_policy)
    act = deepq.load("kaithy_cnn_to_mlp_{}_model.pkl".format(board_size))
    # Enabling layer_norm here is import for parameter space noise!

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            obs, rew, done, _ = env.step(act(obs[None], stochastic=False)[0])
            episode_rew += rew
            env.render()
        print('Episode reward', episode_rew)
        input('Hit enter to play next match')
        print('Swap color')
        env.swap_role()
示例#15
0
文件: RL.py 项目: RikonYu/898Project
def load(env):
    act = deepq.load(path=fname)
    try:
        env.repr.trail = pickle.load(open('trail' + fname, 'rb'))
    except:
        pass
    return act
示例#16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        help='environment ID',
                        default='SuperMarioBros-Nes')
    parser.add_argument('--gamestate',
                        help='game state to load',
                        default='Level1-1')
    parser.add_argument('--model',
                        help='model pickle file from ActWrapper.save',
                        default='model.pkl')
    args = parser.parse_args()

    env = retro_wrappers.make_retro(game=args.env,
                                    state=args.gamestate,
                                    max_episode_steps=None)
    env = retro_wrappers.wrap_deepmind_retro(env)
    act = deepq.load(args.model)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            action = act(obs[None])[0]
            env_action = np.zeros(env.action_space.n)
            env_action[action] = 1
            obs, rew, done, _ = env.step(env_action)
            episode_rew += rew
        print('Episode reward', episode_rew)
示例#17
0
def main():
    #env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v1')
    env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
    timestart = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M:%S')
    # env = VideoRecorderWrapper(env, PROJ_DIR + "/../video", str(timestart), 50)

    env = VideoRecorderWrapper(env, PROJ_DIR + "/../video/final", str(timestart), 1)
    env = DownsampleEnv(env, (84, 84))
    env = PenalizeDeathEnv(env, penalty=-25)
    env = FrameStackEnv(env, 4)
    # good
    #act = deepq.load(PROJ_DIR+"/../models/mario_model_2018-08-12-13:00:58.pkl")

    # better
    act = deepq.load(PROJ_DIR + "/../models/mario_model_2018-08-12-19:21:50.pkl")
    
    episode = 0
    while True:
        obs, done = env.reset(), False
        stepnr = 0
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])

            if stepnr % 20 == 0:
                plot_obs(obs)

            episode_rew += rew
            stepnr += 1
        print("Episode reward", episode_rew, episode)
        episode = episode+1
示例#18
0
def main():
    env = gym.make("cms-v0")
    act = deepq.load("cms_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#19
0
def main():
    env = gym_sliding_puzzle.make("SlidingPuzzle-v0")
    act = deepq.load("sliding_puzzle.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#20
0
def main():
    env = envs.create_env(None)
    act = deepq.load("{}_model.pkl".format(envs.VSTR))

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act([obs])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main(policy_pkl_file):
    env = gym.make('Image-Based-Navigation-2d-Map0-Goal0-v0')
    act = deepq.load(policy_pkl_file)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#22
0
def main():
    env = gym.make(ENV_NAME)
    act = deepq.load(SAVE_PATH)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#23
0
def main():
    env = gym.make("LunarLander-v2")
    act = deepq.load("cartpole_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#24
0
def enviroment():
    env = gym.make("CartPole-v0")
    act = deepq.load("CartPole_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#25
0
def main():
    env = gym.make("MountainCar-v0")
    act = deepq.load("mountaincar_model.pkl")

    while True:
        obs, done = env.reset_state(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():
    env = gym.make("MountainCar-v0")
    act = deepq.load("mountaincar_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#27
0
def main():
    env = gym.make("Token-v1")
    act = deepq.load("runtime_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
        env.close()
示例#28
0
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = deepq.wrap_atari_dqn(env)
    act = deepq.load("pong_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#29
0
def test0():
    env = gym.make("OptimizeGauss-v0")
    act = deepq.load("model/gauss.pkl")
    episode = 0
    for i in range(1000):
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
            # print(episode_rew)

    print(env.gauss(obs))
示例#30
0
def test():
    env = gym.make(args.env)
    act = deepq.load(os.path.join(args.log_dir, args.log_fname))
    if args.record:
        env = Monitor(env, directory=args.log_dir)
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            if not (args.record):
                env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():
    env = gym.make("GazeboModularScara3DOF-v2")
    act = deepq.load("scara_model.pkl")

    #Discrete actions
    goal_average_steps = 2
    max_number_of_steps = 20
    last_time_steps = np.ndarray(0)
    n_bins = 10

    # Number of states is huge so in order to simplify the situation
    # typically, we discretize the space to: n_bins ** number_of_features
    joint1_bins = pandas.cut([-np.pi / 2, np.pi / 2],
                             bins=n_bins,
                             retbins=True)[1][1:-1]
    joint2_bins = pandas.cut([-np.pi / 2, np.pi / 2],
                             bins=n_bins,
                             retbins=True)[1][1:-1]
    joint3_bins = pandas.cut([-np.pi / 2, np.pi / 2],
                             bins=n_bins,
                             retbins=True)[1][1:-1]
    action_bins = pandas.cut([-np.pi / 2, np.pi / 2],
                             bins=n_bins,
                             retbins=True)[1][1:-1]

    difference_bins = abs(joint1_bins[0] - joint1_bins[1])
    action_bins = [(difference_bins, 0.0, 0.0), (-difference_bins, 0.0, 0.0),
                   (0.0, difference_bins, 0.0), (0.0, -difference_bins, 0.0),
                   (0.0, 0.0, difference_bins), (0.0, 0.0, -difference_bins),
                   (0.0, 0.0, 0.0)]
    discrete_action_space = spaces.Discrete(7)

    while True:
        obs, done = env.reset(), False
        print("obs", obs)

        episode_rew = 0
        while not done:
            env.render()
            #obs, rew, done, _ = env.step(act(obs[None])[0])
            action = act(obs[None])[0]
            print("action", action)
            print("action_bins[action]", action_bins[action])
            obs, rew, done, _ = step(env, action_bins[action], obs[:3])
            print("reward", rew)
            print("observation", obs[:3])
            episode_rew += rew
            print("accumulated_reward", episode_rew)
            print("done", done)
        print("Episode reward", episode_rew)
示例#32
0
def main():
    
    env = KukaGymEnv(renders=True)
    act = deepq.load("kuka_model.pkl")
    print(act)
    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():
    env = gym.make("Wavefollower-v0")
    act = deepq.load("wavefollower_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            #env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            #plt.scatter(obs[0],obs[1], color='b')
            plt.scatter(obs[0],obs[2], color = 'r')
            plt.pause(0.00001)
            episode_rew += rew
            #print("Observation = {}".format(obs))
            print("Action = {}".format(act(obs[None])[0]))
        print("Episode reward", episode_rew)
def main():
    env = gym.make('CartPoleBulletEnv-v0')
    act = deepq.load("cartpole_model.pkl")

    while True:
        obs, done = env.reset(), False
        print("obs")
        print(obs)
        print("type(obs)")
        print(type(obs))
        episode_rew = 0
        while not done:
            env.render()
           
            o = obs[None]
            aa = act(o)
            a = aa[0]
            obs, rew, done, _ = env.step(a)
            episode_rew += rew
        print("Episode reward", episode_rew)
import gym
from baselines import deepq

env = gym.make("MountainCar-v0")
act = deepq.load("mountaincar_model.pkl")

while True:
    obs, done = env.reset(), False
    episode_rew = 0
    while not done:
        env.render()
        obs, rew, done, _ = env.step(act(obs[None])[0])
        episode_rew += rew
    print("Episode reward", episode_rew)
import gym
from baselines import deepq

env = gym.make("CartPole-v0")
act = deepq.load("models/cartpole_model_DQN_[128, 128].pkl")

while True:
    obs, done = env.reset(), False
    episode_rew = 0
    while not done:
        env.render()
        obs, rew, done, _ = env.step(act(obs[None])[0])
        episode_rew += rew
    print("Episode reward", episode_rew)