def main(arg): """ Starts different tests Args: param1(args): args """ path = arg.locexp # experiment_name = args.experiment_name res_path = os.path.join(path, "results") if not os.path.exists(res_path): os.makedirs(res_path) dir_model = os.path.join(path, "pytorch_models") if arg.save_model and not os.path.exists(dir_model): os.makedirs(dir_model) train_agent(arg, arg.seed)
def main(args): """ """ with open (args.param, "r") as f: param = json.load(f) print("use the env {} ".format(param["env_name"])) print(param) print("Start Programm in {} mode".format(args.mode)) env = gym.make(param["env_name"]) if args.mode == "args": param["lr"] = args.lr param["fc1_units"] = args.fc1_units param["fc2_units"] = args.fc2_units env = FrameStack(env, param) train_agent(env, param)
cos_ann = True ann_cyc = 5 schedule = Schedule(t0, t1, e0, e1, decay_fun, cosine_annealing=cos_ann, annealing_cycles=ann_cyc) # Policy policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q) # Reward Function reward_fun = rf_info2d_pos # Action Pre/Post-Processing Action act_fun = act_disc2cont # Agent lr = 1e-4 gamma = 0.99 doubleQ = True # Run doubleQ-DQN sampling from Q_target and bootstraping from Q rb = False rb_max_size = 1e6 rb_batch_size = 64 tau = 0.1 agent = DQN(policy, act_fun, Q, Q_target, state_dim, action_dim, gamma, doubleQ, reward_fun=reward_fun, replay_buffer=rb, max_buffer_size=rb_max_size, batch_size=rb_batch_size, tau=tau, lr=lr) # Training show = False train_agent(agent, desc, file_name, runs, episodes, time_steps, test_episodes, init_state, init_noise, show=show)
def train_multiple_agents(model_dir, local_pop_dir, game_path, base_port, num_envs, num_steps, worker_idx, total_workers, reuse_ports=True, level_path=None, time_reward=0.): org_stdout = sys.stdout org_stderr = sys.stderr my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers) for i, p in enumerate(my_pop): print("Worker", worker_idx, "is starting training of", p, "for", num_steps, "steps", flush=True) sys.stdout = open(model_dir + p + "/train_log.txt", 'a') sys.stderr = sys.stdout p_base_port = base_port if reuse_ports else base_port + (num_envs * i * 2) j = 0 last_error = None while p_base_port + (j * num_envs * 2) < 60000: try: train.train_agent(model_dir, local_pop_dir, p, game_path, p_base_port + (j * num_envs * 2), num_envs, num_steps, level_path=level_path, time_reward=time_reward) break except ConnectionError as e: print( "ConnectionError detected during training, trying a higher port range" ) j += 1 last_error = e except ConnectionResetError as e2: print( "ConnectionResetError detected during training, trying a higher port range" ) j += 1 last_error = e2 except EOFError as e3: print( "EOFError detected during training, trying higher port range" ) j += 1 last_error = e3 except json.decoder.JSONDecodeError as e4: print( "JSONDecodeError detected during training, trying higher port range" ) j += 1 last_error = e4 sys.stdout.close() sys.stderr.close() sys.stdout = org_stdout sys.stderr = org_stderr if p_base_port + (j * num_envs * 2) >= 60000: if last_error: raise last_error else: raise ValueError( "So there's no last_error, but we got here...?") print("Worker", worker_idx, "has completed training of", p, "for", num_steps, "steps", flush=True)
import torch import argparse from train import train_agent parser = argparse.ArgumentParser() parser.add_argument("--env_id", type=str, default="PongNoFrameskip-v4") parser.add_argument("--frame_stack", type=int, default=4) parser.add_argument("--capacity", type=int, default=100000) parser.add_argument("--batch_size", type=int, default=64) parser.add_argument("--lr", type=float, default=0.00001) parser.add_argument("--num_frames_to_train", type=int, default=1500000) parser.add_argument("--warm_up", type=int, default=10000) parser.add_argument("--gamma", type=float, default=0.99) parser.add_argument("--update_target", type=int, default=1000) args = parser.parse_args() if __name__ == "__main__": device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_agent(args, device)
def example_2(): """ Runs empowerment maximising agent running in a chosen grid world """ np.random.seed(1) # maze n_step = 3 f = WorldFactory() w = f.klyubin_world()#, tunnel_world() B = w.compute_transition() strategy = VisitCountFast() E = strategy.compute(world=w, T=B, n_step=n_step).reshape(-1) initpos = [1,3] # np.random.randint(w.dims[0], size=2) s = w._cell_to_index(initpos) # for reference emptymaze = MazeWorld(w.height, w.width) T = emptymaze.compute_transition() n_s, n_a, _ = T.shape # agent agent = EmpMaxAgent(alpha=0.1, gamma=0.9, T=T, n_step=n_step, n_samples=1000, det=1.) agent.s = s # training loop start = time.time() D_emp, D_mod, steps, tau, visited = train_agent(B, E, agent, w, n_s, n_a) print("elapsed seconds: %0.3f" % (time.time() - start)) # some plotting fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(9, 6)) #Amap = np.array([list(w.actions.values())[i] for i in agent.action_map]) #ax[0, 0].quiver(np.arange(w.width) + .5, np.arange(w.height) + .5, Amap[:, 1].reshape(w.height, w.width), Amap[:, 0].reshape(w.height, w.width)) w.plot(fig, ax[0, 0], colorMap= agent.E.reshape(*w.dims)) ax[0, 0].set_title('subjective empowerment') print(f'min = {np.min(agent.E):.2f}, max = {np.max(agent.E):.2f}') w.plot(fig, ax[0,1], colorMap=visited.reshape(*w.dims)) ax[0, 1].set_title('visited') Vmap = agent.value_map.reshape(*w.dims) w.plot(fig, ax[0, 2], colorMap= Vmap) ax[0, 2].set_title('value map') print(f'min = {np.min(Vmap):.2f}, max = {np.max(Vmap):.2f}') ax[1, 1].set_title("tau") ax[1, 1].plot(tau) ax[1, 0].scatter(agent.E, visited.reshape(n_s)) ax[1, 0].set_xlabel('true empowerment') ax[1, 0].set_ylabel('visit frequency') red = 'tab:red' ax[1, 2].plot(D_emp, color=red) ax[1, 2].set_xlabel('time') ax[1, 2].set_ylabel('MSE of empowerment map', color=red) ax[1, 2].tick_params(axis='y', labelcolor=red) ax[1, 2] = ax[1, 2].twinx() ax[1, 2].set_ylabel('Model disagreement', color='tab:blue') ax[1, 2].plot(D_mod, color='tab:blue') ax[1, 2].tick_params(axis='y', labelcolor='tab:blue') w.plot(fig, ax[2, 0], colorMap= E.reshape(*w.dims)) ax[2, 0].set_title('true empowerment') plt.show()