def main(args): if args.central_train is True: raise NotImplementedError("todo") # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy agents = [ set_policy(env, tb_writer, log, args, name="agent", i_agent=i_agent) for i_agent in range(args.n_agent) ] # Start train train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy student_n = [ set_policy(env, log, args, name="student", i_agent=i_agent) for i_agent in range(args.n_student) ] # Start train train(student_n=student_n, env=env, log=log, tb_writer=tb_writer, args=args) if not os.path.exists("./saved_model"): os.makedirs("./saved_model")
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs log = set_log(args) tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) # Create env env = make_env(args) # Set seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) # Initialize agents agent1 = Agent(env, log, tb_writer, args, name="agent1", i_agent=1) agent2 = Agent(env, log, tb_writer, args, name="agent2", i_agent=2) # Start train train(agent1, agent2, env, log, tb_writer, args)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(log, args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy agent = set_policy(env, tb_writer, log, args, name=args.algorithm) if args.test: from tester import test test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args) else: from trainer import train train(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
def __init__(self, args): self.args = args self.num_workers = mp.cpu_count() - 1 if self.num_workers > args.n_traj: self.num_workers = args.n_traj self.queue = mp.Queue() self.envs = SubprocVecEnv(envs=[ make_env(args.env_name, args.n_agent) for _ in range(self.num_workers) ], queue=self.queue, args=args) # Set seed to envs self.envs.seed(0)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy opponent_n = [ set_policy(env, tb_writer, log, args, name="opponent", i_agent=i_agent) for i_agent in range(1) ] modeler = set_policy(env, tb_writer, log, args, name="modeler", i_agent=0) # Start training if args.train_opponent: train_opponent(opponent_n=opponent_n, env=env, log=log, tb_writer=tb_writer, args=args) else: # Load trained opponent model for opponent in opponent_n: opponent.load_model(filename="opponent0_500", directory="./pytorch_models") train_modeler(modeler=modeler, opponent_n=opponent_n, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy # Note that only one teacher is considered in the one box push domain # to transfer knowledge from agent $i$ to agent $k$ (Section 6.1) workers = [ set_policy(env, tb_writer, log, args, name="worker", i_agent=i_agent) for i_agent in range(args.n_worker)] managers = [ set_policy(env, tb_writer, log, args, name="manager", i_agent=i_agent) for i_agent in range(args.n_manager)] temp_managers = [ set_policy(env, tb_writer, log, args, name="temp_manager", i_agent=i_agent) for i_agent in range(args.n_manager)] teacher = set_policy(env, tb_writer, log, args, name="teacher", i_agent=0) assert len(workers) == len(managers), "The two number must be same" assert len(managers) == len(temp_managers), "The two number must be same" # Start train train( workers=workers, managers=managers, temp_managers=temp_managers, teacher=teacher, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy predator_agents = [ set_policy(env, tb_writer, log, args, name="predator", i_agent=i_agent) for i_agent in range(args.n_predator) ] prey_agents = [ set_policy(env, tb_writer, log, args, name="prey", i_agent=i_agent) for i_agent in range(args.n_prey) ] # Start training train(predator_agents=predator_agents, prey_agents=prey_agents, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Check arguments assert args.n_agent == 2, "Only two agents are supported" # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./data"): os.makedirs("./data") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) np.random.seed(args.seed) # Set agents agents = [ Agent(env=env, tb_writer=tb_writer, log=log, args=args, name="agent", i_agent=i_agent) for i_agent in range(args.n_agent) ] # Get true return by Monte Carlo estimate if args.estimate_option == "montecarlo": from trainer.montecarlo import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) elif args.estimate_option == "naive": from trainer.naive import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) elif args.estimate_option == "ours": from trainer.ours_mp import train # from trainer.ours import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) else: raise ValueError("Invalid option") # Save and vis result save_name = args.estimate_option + "_" + str(args.decay_max_timesteps) if args.estimate_option == "ours": save_name += "_" + str(args.future_max_timesteps) np.save("./data/" + save_name + ".npy", table)