def main(name, size): model_location = ("logs/"+name+"NoFrameskip-v4_"+str(size)+"/model.pkl") env_name = name+"NoFrameskip-v4" #env = gym.make(name+"NoFrameskip-v4") env = make_atari(env_name) env = deepq.wrap_atari_dqn(env) act = deepq.load(model_location) episodeRewards = [] for i in range(100): obs, done = env.reset(), False episode_rew = 0 while not done: # env.render() action = act(obs[None])[0] obs, rew, done, _ = env.step(action) episode_rew += rew print(episode_rew) episodeRewards.append(episode_rew) output = name+","+str(size) for r in episodeRewards: output+=","+str(r) output+="\n" with open("validationStats.csv", 'a') as myfile: myfile.write(output)
def main(): if not os.path.exists("final_models/thor"): os.makedirs("final_models/thor") os.makedirs("final_models/thor/AB") os.makedirs("final_models/thor/separate_actions") env = gym.make(ENV) act = deepq.load(MODEL) action_file = open(BC_FILE, "w") steps = 0 while steps < STEPS: obs, done = env.reset(), False episode_rew = 0 while not done and steps < STEPS: state_1 = cv2.resize(obs, (128, 128)) if np.random.uniform(0, 1) < .75: action = act(obs[None])[0] else: action = env.action_space.sample() obs, rew, done, _ = env.step(action) state_2 = cv2.resize(obs, (128, 128)) cv2.imwrite(FILE + str(steps) + ".png", np.hstack([state_1, state_2])) action_file.write("[" + str(action) + "]\n") episode_rew += rew steps += 1 print(steps) print("Episode reward", episode_rew) action_file.close()
def main(path="./models/deepq/mario_reward_1736.7.pkl"): step_mul = 16 steps = 200 FLAGS = flags.FLAGS flags.DEFINE_string("env", "SuperMarioBros-v0", "RL environment to train.") flags.DEFINE_string("algorithm", "deepq", "RL algorithm to use.") FLAGS(sys.argv) # 1. Create gym environment env = gym_super_mario_bros.make('SuperMarioBros-v0') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) act = deepq.load(path) nstack = 4 nh, nw, nc = env.observation_space.shape history = np.zeros((1, nh, nw, nc * nstack), dtype=np.uint8) obs, done = env.reset(), False # history = update_history(history, obs) episode_rew = 0 while not done: env.render() action = act([obs])[0] obs, rew, done, _ = env.step(action) # history = update_history(history, obs) episode_rew += rew print("action : %s reward : %s" % (action, rew)) print("Episode reward", episode_rew)
def main(): rospy.init_node('movingcube_onedisk_walk_gym_predict', anonymous=True, log_level=rospy.WARN) env = gym.make("MyMovingCubeOneDiskWalkEnv-v0") # Get Path to saved model rospack = rospkg.RosPack() pkg_path = rospack.get_path('my_moving_cube_pkg') models_dir_path = os.path.join(pkg_path, "models_saved") assert os.path.exists( models_dir_path ), " NO models saved exists in path =" + str(models_dir_path) out_model_file_path = os.path.join(models_dir_path, "movingcube_model.pkl") act = deepq.load(out_model_file_path) while True: obs, done = env.reset(), False episode_rew = 0 while not done: print(str(obs)) # TODO: Work still , gives some errors action = act(obs) print(str(action))
def main(): env = gym.make("PokerHistory-v0") # td = "/tmp/tmpugaszyfi" # model_file = os.path.join(td, "model") # U.load_state(model_file) # act.save("poker_model_test.pkl") act = deepq.load("poker_model.pkl") rL = [] for num_steps in range(1000): obs = env.reset() done = False episode_rew = 0 action = None while not done: #this will spit out a random number of hands bc who knows if the state is done in a random pull env.print_state() action = act(obs[None])[0] print("Action choice: ", str(action), ":", env.action_dict[action]) obs, rew, done, _ = env.step(action) # import pdb; pdb.set_trace() episode_rew += rew # if the hand states are random this has no meaning. rL.append(rew) print("Episode reward", episode_rew) print("Mean and stdev of reward is: {}, {}".format(np.mean(rL), np.std(rL)))
def run_dqn(output_dir, train): # Create the environment. env = gym.make('CartPole-v0') env = pw.CartPoleParetoWrapper(env) saved_file = os.path.join(output_dir, 'cartpole_dqn.pkl') if train: model = deepq.models.mlp([64]) act = deepq.learn( env, q_func=model, lr=1e-3, # Recommended: 1e-3 max_timesteps=100000, # Recommended: 100000 buffer_size=50000, # Recommended: 50000 exploration_fraction=0.1, # Recommended: 0.1 exploration_final_eps=0.02, # Recommended: 0.02 print_freq=10, # Recommended: 10 callback=dqn_callback) # Save file. print('[dqn] Saving model to', saved_file) act.save(saved_file) else: act = deepq.load(saved_file) # Replay. obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("[dqn] Episode reward", episode_rew)
def main(): env = gym.make("CartPole-v0") act = deepq.load("cartpole_model.pkl") episode=0 chain_dump=[] trans=[] while episode<=10: # True: obs, done = env.reset(), False episode_rew = 0 trans=[] while not done: env.render() r=random.uniform(0,1) if(r <= 0.55): action = act(obs[None])[0] else: action = random.randint(0,1) new_obs, rew, done, _ = env.step(action) trans.append([obs, action, rew, new_obs]) obs=new_obs episode_rew += rew episode=episode+1 print("Episode reward", episode_rew) chain_dump.append(np.vstack(trans)) filehandler = open("policy_transitions.seq","wb") pickle.dump(chain_dump,filehandler, protocol=1) filehandler.close() print('policy sequences saved',replay_buffer.__len__()) print('done')
def main(self): env = gym.make("RubiksCube-v0") env.setScramble(self.m, self.m) act = deepq.load(self.path) total_reward = [] for i in range(self.num_episodes): obs, done = env.reset(), False episode_rew = 0 while not done: # uncomment this if you want it to render the cube # env.render() obs, rew, done, _ = env.step(act(obs[None], update_eps=0)[0]) episode_rew += rew total_reward.append(episode_rew) # uncomment this if you want it to render the last state # env.render() print("Episode reward: {}".format(episode_rew)) print("scramble, action_history: {}".format(env.getlog())) print("-----------------------") print("total: {}, Solved: {}, Unsolved: {}".format( len(total_reward), total_reward.count(1), total_reward.count(0)))
def test_bandits(n_arm, cost): env = MetaBanditEnv(n_arm, 25, cost) cost_i = np.abs(np.logspace(-4, -1, 7) - cost).argmin() filename = "data/bandit_dqn/weights/bandit_" + str(n_arm) + "_" + str( cost) + "_model.pkl" act = deepq.load(filename) tot_rew = 0 dfs = [] for _ in range(2000): obs, done = env.reset(), False temp_rew = 0 obs_count = 0 while not done: obs, rew, done, _ = env.step(act(obs[None])[0]) temp_rew += rew obs_count += 1 df = { 'util': temp_rew, 'observations': obs_count - 1, 'agent': 'dqn', 'n_arm': n_arm, 'max_obs': 25, 'cost': cost } tot_rew += temp_rew dfs.append(df) print( str(n_arm) + " arm, cost: " + str(cost) + ", reward: " + str(tot_rew / 2000)) data = pd.DataFrame(dfs) print(data.util.mean()) store = pd.HDFStore('data/bandit_dqn/results/dqn_results_' + str(n_arm) + "_" + str(cost_i) + '.h5') store['data'] = data store.close()
def main(): env = gym.make("simplePendulum-v1") funame = args.filename act = deepq.load(funame) otim = os.stat(funame).st_mtime while True: mtim = os.stat(funame).st_mtime if mtim != otim: #act = None #act = deepq.load(funame) print("Loaded new controller...") obs, done = env.reset(), False episode_rew = 0 nsteps = 0 while nsteps < 500: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew nsteps += 1 print("Episode reward = ", round(episode_rew, 2)) #print("Angle = {0:2.2f} deg, Vel = {1:2.2f} deg/s, Torque = {2:2.2f} N/m".format(180/np.pi*(obs[0]), 180/np.pi*obs[1], obs[2])) print("Angle = {0:2.2f} deg, Vel = {1:2.2f} deg/s".format( 180 / np.pi * np.arccos(obs[0]), 180 / np.pi * obs[2])) print(" ")
def test(env, load_path, num_episodes=1000): act = deepq.load(load_path + ".pkl") success_count = 0.0 test_render_file = open(load_path + ".txt", "w") for i in range(num_episodes): obs, done = env.reset(), False episode_rew = 0.0 while not done: render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) if (episode_rew > -env.n): print("Episode successful with reward ", episode_rew) test_render_file.write("Episode successful with reward " + str(episode_rew) + "\n") success_count += 1.0 else: print("Episode unsuccessful with reward ", episode_rew) test_render_file.write("Episode unsuccessful with reward " + str(episode_rew) + "\n") success_rate = success_count / num_episodes print("Success Rate: ", success_rate) test_render_file.write("Success Rate: " + str(success_rate) + "\n") test_render_file.close() return success_rate
def __init__(self, force_mag, reg): self.reg = reg self.iters = 100 self.T = 200 self.trials = 1 self.alpha = 0.1 self.lambda_prior = list(np.ones(10)) self.eta = 1.0 self.inner_eta = self.eta self.params = {} self.params['T'] = self.T self.params['iters'] = self.iters self.act = deepq.load("cartpole_model_alt2.pkl") if self.reg: self.base_dir = 'data/reg_cartpole_force_mag' + str(force_mag) else: self.base_dir = 'data/cartpole_force_mag' + str(force_mag) self.dir = os.path.join(self.base_dir, 'dagger') self.prefix = 'dagger' self.path = os.path.join(self.dir, self.prefix) self.force_mag = force_mag self.t = .01
def test(env, load_path, num_episodes=10000): act = deepq.load(load_path + ".pkl") success_count = 0.0 test_render_file = open(load_path + ".txt", "w") for i in range(num_episodes): obs, done = env.reset(seed=i), False env_string = "Initial State: " + str( (env.initial_state).T) + "\nGoal State: " + str( (env.goal).T) + "\nMax_reward: " + str(env.reward_max) + "\n" print(env_string) test_render_file.write(env_string) episode_rew = 0.0 while not done: render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) obs, rew, done, _ = env.step( act(np.concatenate([obs, env.goal])[None])[0]) episode_rew += rew render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) if (episode_rew > -env.n): print("Episode successful with reward ", episode_rew) test_render_file.write("Episode successful with reward " + str(episode_rew) + "\n") success_count += 1.0 else: print("Episode unsuccessful with reward ", episode_rew) test_render_file.write("Episode unsuccessful with reward " + str(episode_rew) + "\n") success_rate = success_count / num_episodes print("Success Rate: ", success_rate) test_render_file.write("Success Rate: " + str(success_rate) + "\n") test_render_file.close() return success_rate
def enjoy(board_size): """enjoy trained gomoku AI play board whose size is board_size x board_size. Parameters ---------- board_size: int Size of board in one dimension, example: board_size = 9 --> board have size 9x9 Returns ------- None """ env = gym.make('Gomoku{}x{}-arena-v0'.format(board_size, board_size), __val_opponent_policy) act = deepq.load("kaithy_cnn_to_mlp_{}_model.pkl".format(board_size)) # Enabling layer_norm here is import for parameter space noise! while True: obs, done = env.reset(), False episode_rew = 0 while not done: obs, rew, done, _ = env.step(act(obs[None], stochastic=False)[0]) episode_rew += rew env.render() print('Episode reward', episode_rew) input('Hit enter to play next match') print('Swap color') env.swap_role()
def load(env): act = deepq.load(path=fname) try: env.repr.trail = pickle.load(open('trail' + fname, 'rb')) except: pass return act
def main(): parser = argparse.ArgumentParser() parser.add_argument('--env', help='environment ID', default='SuperMarioBros-Nes') parser.add_argument('--gamestate', help='game state to load', default='Level1-1') parser.add_argument('--model', help='model pickle file from ActWrapper.save', default='model.pkl') args = parser.parse_args() env = retro_wrappers.make_retro(game=args.env, state=args.gamestate, max_episode_steps=None) env = retro_wrappers.wrap_deepmind_retro(env) act = deepq.load(args.model) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() action = act(obs[None])[0] env_action = np.zeros(env.action_space.n) env_action[action] = 1 obs, rew, done, _ = env.step(env_action) episode_rew += rew print('Episode reward', episode_rew)
def main(): #env = gym_super_mario_bros.make('SuperMarioBros-v0') env = gym_super_mario_bros.make('SuperMarioBros-1-1-v1') env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) timestart = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M:%S') # env = VideoRecorderWrapper(env, PROJ_DIR + "/../video", str(timestart), 50) env = VideoRecorderWrapper(env, PROJ_DIR + "/../video/final", str(timestart), 1) env = DownsampleEnv(env, (84, 84)) env = PenalizeDeathEnv(env, penalty=-25) env = FrameStackEnv(env, 4) # good #act = deepq.load(PROJ_DIR+"/../models/mario_model_2018-08-12-13:00:58.pkl") # better act = deepq.load(PROJ_DIR + "/../models/mario_model_2018-08-12-19:21:50.pkl") episode = 0 while True: obs, done = env.reset(), False stepnr = 0 episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) if stepnr % 20 == 0: plot_obs(obs) episode_rew += rew stepnr += 1 print("Episode reward", episode_rew, episode) episode = episode+1
def main(): env = gym.make("cms-v0") act = deepq.load("cms_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym_sliding_puzzle.make("SlidingPuzzle-v0") act = deepq.load("sliding_puzzle.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = envs.create_env(None) act = deepq.load("{}_model.pkl".format(envs.VSTR)) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act([obs])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(policy_pkl_file): env = gym.make('Image-Based-Navigation-2d-Map0-Goal0-v0') act = deepq.load(policy_pkl_file) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make(ENV_NAME) act = deepq.load(SAVE_PATH) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("LunarLander-v2") act = deepq.load("cartpole_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def enviroment(): env = gym.make("CartPole-v0") act = deepq.load("CartPole_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("MountainCar-v0") act = deepq.load("mountaincar_model.pkl") while True: obs, done = env.reset_state(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("MountainCar-v0") act = deepq.load("mountaincar_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("Token-v1") act = deepq.load("runtime_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew) env.close()
def main(): env = gym.make("PongNoFrameskip-v4") env = deepq.wrap_atari_dqn(env) act = deepq.load("pong_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def test0(): env = gym.make("OptimizeGauss-v0") act = deepq.load("model/gauss.pkl") episode = 0 for i in range(1000): obs, done = env.reset(), False episode_rew = 0 while not done: obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew # print(episode_rew) print(env.gauss(obs))
def test(): env = gym.make(args.env) act = deepq.load(os.path.join(args.log_dir, args.log_fname)) if args.record: env = Monitor(env, directory=args.log_dir) while True: obs, done = env.reset(), False episode_rew = 0 while not done: if not (args.record): env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("GazeboModularScara3DOF-v2") act = deepq.load("scara_model.pkl") #Discrete actions goal_average_steps = 2 max_number_of_steps = 20 last_time_steps = np.ndarray(0) n_bins = 10 # Number of states is huge so in order to simplify the situation # typically, we discretize the space to: n_bins ** number_of_features joint1_bins = pandas.cut([-np.pi / 2, np.pi / 2], bins=n_bins, retbins=True)[1][1:-1] joint2_bins = pandas.cut([-np.pi / 2, np.pi / 2], bins=n_bins, retbins=True)[1][1:-1] joint3_bins = pandas.cut([-np.pi / 2, np.pi / 2], bins=n_bins, retbins=True)[1][1:-1] action_bins = pandas.cut([-np.pi / 2, np.pi / 2], bins=n_bins, retbins=True)[1][1:-1] difference_bins = abs(joint1_bins[0] - joint1_bins[1]) action_bins = [(difference_bins, 0.0, 0.0), (-difference_bins, 0.0, 0.0), (0.0, difference_bins, 0.0), (0.0, -difference_bins, 0.0), (0.0, 0.0, difference_bins), (0.0, 0.0, -difference_bins), (0.0, 0.0, 0.0)] discrete_action_space = spaces.Discrete(7) while True: obs, done = env.reset(), False print("obs", obs) episode_rew = 0 while not done: env.render() #obs, rew, done, _ = env.step(act(obs[None])[0]) action = act(obs[None])[0] print("action", action) print("action_bins[action]", action_bins[action]) obs, rew, done, _ = step(env, action_bins[action], obs[:3]) print("reward", rew) print("observation", obs[:3]) episode_rew += rew print("accumulated_reward", episode_rew) print("done", done) print("Episode reward", episode_rew)
def main(): env = KukaGymEnv(renders=True) act = deepq.load("kuka_model.pkl") print(act) while True: obs, done = env.reset(), False print("===================================") print("obs") print(obs) episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("Wavefollower-v0") act = deepq.load("wavefollower_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: #env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) #plt.scatter(obs[0],obs[1], color='b') plt.scatter(obs[0],obs[2], color = 'r') plt.pause(0.00001) episode_rew += rew #print("Observation = {}".format(obs)) print("Action = {}".format(act(obs[None])[0])) print("Episode reward", episode_rew)
def main(): env = gym.make('CartPoleBulletEnv-v0') act = deepq.load("cartpole_model.pkl") while True: obs, done = env.reset(), False print("obs") print(obs) print("type(obs)") print(type(obs)) episode_rew = 0 while not done: env.render() o = obs[None] aa = act(o) a = aa[0] obs, rew, done, _ = env.step(a) episode_rew += rew print("Episode reward", episode_rew)
import gym from baselines import deepq env = gym.make("MountainCar-v0") act = deepq.load("mountaincar_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
import gym from baselines import deepq env = gym.make("CartPole-v0") act = deepq.load("models/cartpole_model_DQN_[128, 128].pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)