Exemplo n.º 1
0
def main(args):
    if args.central_train is True:
        raise NotImplementedError("todo")

    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    agents = [
        set_policy(env, tb_writer, log, args, name="agent", i_agent=i_agent)
        for i_agent in range(args.n_agent)
    ]

    # Start train
    train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args)
Exemplo n.º 2
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    student_n = [
        set_policy(env, log, args, name="student", i_agent=i_agent)
        for i_agent in range(args.n_student)
    ]

    # Start train
    train(student_n=student_n,
          env=env,
          log=log,
          tb_writer=tb_writer,
          args=args)

    if not os.path.exists("./saved_model"):
        os.makedirs("./saved_model")
Exemplo n.º 3
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    log = set_log(args)
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))

    # Create env
    env = make_env(args)

    # Set seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    env.seed(args.seed)

    # Initialize agents
    agent1 = Agent(env, log, tb_writer, args, name="agent1", i_agent=1)
    agent2 = Agent(env, log, tb_writer, args, name="agent2", i_agent=2)

    # Start train
    train(agent1, agent2, env, log, tb_writer, args)
Exemplo n.º 4
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(log, args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    agent = set_policy(env, tb_writer, log, args, name=args.algorithm)

    if args.test:
        from tester import test
        test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
    else:
        from trainer import train
        train(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
Exemplo n.º 5
0
    def __init__(self, args):
        self.args = args
        self.num_workers = mp.cpu_count() - 1
        if self.num_workers > args.n_traj:
            self.num_workers = args.n_traj

        self.queue = mp.Queue()
        self.envs = SubprocVecEnv(envs=[
            make_env(args.env_name, args.n_agent)
            for _ in range(self.num_workers)
        ],
                                  queue=self.queue,
                                  args=args)

        # Set seed to envs
        self.envs.seed(0)
Exemplo n.º 6
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    opponent_n = [
        set_policy(env, tb_writer, log, args, name="opponent", i_agent=i_agent)
        for i_agent in range(1)
    ]

    modeler = set_policy(env, tb_writer, log, args, name="modeler", i_agent=0)

    # Start training
    if args.train_opponent:
        train_opponent(opponent_n=opponent_n,
                       env=env,
                       log=log,
                       tb_writer=tb_writer,
                       args=args)
    else:
        # Load trained opponent model
        for opponent in opponent_n:
            opponent.load_model(filename="opponent0_500",
                                directory="./pytorch_models")

        train_modeler(modeler=modeler,
                      opponent_n=opponent_n,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
Exemplo n.º 7
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    # Note that only one teacher is considered in the one box push domain 
    # to transfer knowledge from agent $i$ to agent $k$ (Section 6.1)
    workers = [
        set_policy(env, tb_writer, log, args, name="worker", i_agent=i_agent)
        for i_agent in range(args.n_worker)]
    managers = [
        set_policy(env, tb_writer, log, args, name="manager", i_agent=i_agent)
        for i_agent in range(args.n_manager)]
    temp_managers = [
        set_policy(env, tb_writer, log, args, name="temp_manager", i_agent=i_agent)
        for i_agent in range(args.n_manager)]
    teacher = set_policy(env, tb_writer, log, args, name="teacher", i_agent=0)

    assert len(workers) == len(managers), "The two number must be same"
    assert len(managers) == len(temp_managers), "The two number must be same"
    
    # Start train
    train(
        workers=workers, managers=managers, 
        temp_managers=temp_managers, teacher=teacher,
        env=env, log=log, tb_writer=tb_writer, args=args)
Exemplo n.º 8
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    predator_agents = [
        set_policy(env, tb_writer, log, args, name="predator", i_agent=i_agent)
        for i_agent in range(args.n_predator)
    ]

    prey_agents = [
        set_policy(env, tb_writer, log, args, name="prey", i_agent=i_agent)
        for i_agent in range(args.n_prey)
    ]

    # Start training
    train(predator_agents=predator_agents,
          prey_agents=prey_agents,
          env=env,
          log=log,
          tb_writer=tb_writer,
          args=args)
Exemplo n.º 9
0
def main(args):
    # Check arguments
    assert args.n_agent == 2, "Only two agents are supported"

    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./data"):
        os.makedirs("./data")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    np.random.seed(args.seed)

    # Set agents
    agents = [
        Agent(env=env,
              tb_writer=tb_writer,
              log=log,
              args=args,
              name="agent",
              i_agent=i_agent) for i_agent in range(args.n_agent)
    ]

    # Get true return by Monte Carlo estimate
    if args.estimate_option == "montecarlo":
        from trainer.montecarlo import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    elif args.estimate_option == "naive":
        from trainer.naive import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    elif args.estimate_option == "ours":
        from trainer.ours_mp import train
        # from trainer.ours import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    else:
        raise ValueError("Invalid option")

    # Save and vis result
    save_name = args.estimate_option + "_" + str(args.decay_max_timesteps)
    if args.estimate_option == "ours":
        save_name += "_" + str(args.future_max_timesteps)
    np.save("./data/" + save_name + ".npy", table)