def setupSelfPlayZero(self): #r = random.randint(0,self.iterNo-1) r = self.iterNo-1 if self.ishumanFirstPlayer and self.ishumanCut: self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(1, 1, r)) elif self.ishumanFirstPlayer and not self.ishumanCut: self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(1, 0, r)) elif not self.ishumanFirstPlayer and self.ishumanCut: self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(0, 1, r)) else: self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(0, 0, r)) print("Self play Zero set up")
def main(): # setup environment computerType = sys.argv[1] ishumanFirstPlayer = int(sys.argv[2]) ishumanCut = int(sys.argv[3]) fileName = sys.argv[4] env = gym.make('shannon_switching-v0') env.configureEnvironment(computerType=computerType, ishumanFirstPlayer=ishumanFirstPlayer, ishumanCut=ishumanCut, iterNo=20, epsilon=0.2) print("ishumanFirstPlayer ", ishumanFirstPlayer) print("ishumanCut", ishumanCut) model = deepq.load_act(fileName) totalIterations = 2000 totalWins = 0 for i in range(totalIterations): print(i) state = env.reset() while True: state, reward, isOver, __ = env.step(model.step(state)[0][0]) if isOver != 0: break if reward == 1000: totalWins += 1 print("Accuracy: ", totalWins / totalIterations)
def main(): ##np.seterr(all='raise') env = gym.make("apl-v0") #act = deepq.learn( #env, #network='mlp', #lr=1e-3, #checkpoint_freq=None, #total_timesteps=int(1e5), #buffer_size=50000, #exploration_fraction=0.1, #exploration_final_eps=0.02, #print_freq=10, #load_path="./models/apl-v0-dqn-20181003-151750", #callback=callback #) #timestr = time.strftime("%Y%m%d-%H%M%S") #act.save("./models/apl-v0-dqn-" + timestr) act = deepq.load_act("./models/apl-v0-dqn-20181003-152611.pickle") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def load_agent_from_file(name): """Loads a pickled RL agent from file""" from baselines.deepq import load_act # needed to get the unpickling to work since the pickling is done # from a __name__=="__main__" # pylint: disable=unused-import from q_network import EdgeQNetwork act = load_act(name) return act
def main(): print('-*-*-*- enjoy worker -*-*-*-') # tf.graph().as_default() # tf.reset_default_graph() env = gym.make("CartPole-v0") act = deepq.load_act("model.pkl") max_episodes = 5 while max_episodes > 0: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew) max_episodes = max_episodes - 1
def test(env, load_path, num_episodes=1000): act = deepq.load_act(load_path + ".pkl") # success_count=0.0 test_render_file = open(load_path + ".txt", "w") best_obs = np.ones(env.n * env.m, dtype=int) best_episode_rew = -1 * env.n for i in range(num_episodes): obs, done = env.reset(), False episode_rew = 0.0 while not done: render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew if episode_rew < best_episode_rew: best_episode_rew = episode_rew best_obs = obs render_string = env.render(mode='ansi') + "\n" test_render_file.write(render_string) test_render_file.write("Episode reward " + str(episode_rew) + "\n") test_render_file.close() obs_pm1 = best_obs * 2 - 1 state_pm1 = np.reshape(obs_pm1, [env.m, env.n]) print('State') print(best_obs) print(state_pm1) print() pairs = np.array(list(itertools.combinations(range(env.m), 2))) first_i = pairs[:, 0] second_i = pairs[:, 1] # Compute auto auto_corr_vectors = np.flip(np.fft.ifft( np.fft.fft(np.flip(state_pm1, axis=1)) * np.fft.fft(state_pm1)), axis=1) mean_sqr_side_peak_auto = np.mean( np.square(np.abs(auto_corr_vectors[:, 1:]))) var_sqr_side_peak_auto = np.var( np.mean(np.square(np.abs(auto_corr_vectors[:, 1:])), axis=1)) var_sqr_side_peak_auto_norm = np.var( np.mean(np.square(np.abs(auto_corr_vectors[:, 1:])), axis=1) / (env.n * env.n)) print('Auto') print(np.real(auto_corr_vectors)) print() # Compute average balance bal = np.mean(np.abs(np.sum(state_pm1, axis=1))) # Compute cross cross_corr_vectors = np.flip(np.fft.ifft( np.fft.fft(np.flip(state_pm1[first_i, :], axis=1)) * np.fft.fft(state_pm1[second_i, :])), axis=1) mean_sqr_side_peak_cross = np.mean(np.square(np.abs(cross_corr_vectors))) var_sqr_side_peak_cross = np.var( np.mean(np.square(np.abs(cross_corr_vectors)), axis=1)) var_sqr_side_peak_cross_norm = np.var( np.mean(np.square(np.abs(cross_corr_vectors)), axis=1) / (env.n * env.n)) print('Cross') print(np.real(cross_corr_vectors)) print() mean_sqr_side_peak = 0.5 * mean_sqr_side_peak_auto + 0.5 * mean_sqr_side_peak_cross print('Mean sqr (auto):', mean_sqr_side_peak_auto) print('Mean sqr (cross):', mean_sqr_side_peak_cross) print('Mean sqr:', mean_sqr_side_peak) print('Var sqr (auto):', var_sqr_side_peak_auto) print('Var sqr (cross):', var_sqr_side_peak_cross) print('Mean bal:', bal) print() print('----------Normalized----------') print('Mean sqr (auto):', mean_sqr_side_peak_auto / (env.n * env.n)) print('Mean sqr (cross):', mean_sqr_side_peak_cross / (env.n * env.n)) print('Mean sqr:', mean_sqr_side_peak / (env.n * env.n)) print('Var sqr (auto):', var_sqr_side_peak_auto_norm) print('Var sqr (cross):', var_sqr_side_peak_cross_norm) print()
def main(): env = gym.make(ENV) act = deepq.load_act(MODEL) steps = 0 outfile = open(FILE, 'w') bcfile = open(BC_FILE, 'w') total_reward = 0 episodes = 0 while steps < 50000: obs, done = env.reset(), False episode_rew = 0 while not done: #env.render() state_1 = obs if np.random.uniform(0, 1) <= RANDOM: action = act(obs[None])[0] else: action = env.action_space.sample() obs, rew, done, _ = env.step(action) state_2 = obs if RANDOM == DEFAULT: # write to AON file to_write = '[' for w in state_1: to_write += str(w) + ',' to_write = to_write[:-1] to_write += ']' outfile.write(to_write) outfile.write(" ") to_write = '[' for w in state_2: to_write += str(w) + ',' to_write = to_write[:-1] to_write += ']' outfile.write(to_write) outfile.write("\n") # write to BC file to_write = '[' for w in state_1: to_write += str(w) + ',' to_write = to_write[:-1] to_write += ']' bcfile.write(to_write) bcfile.write(" ") bcfile.write("[" + str(action) + "]") bcfile.write(" ") to_write = '[' for w in state_2: to_write += str(w) + ',' to_write = to_write[:-1] to_write += ']' bcfile.write(to_write) bcfile.write("\n") episode_rew += rew steps += 1 print(steps) print("Episode reward", episode_rew) total_reward += episode_rew episodes += 1. print("Average reward", total_reward / episodes) outfile.close() bcfile.close()
def __init__(self): expert_path = data_root_path / "experts/mountaincar_deepq_custom.pickle" from baselines import deepq self.expert = deepq.load_act(expert_path)