Пример #1
0
Файл: run.py Проект: rsun0/sc2rl
def run_training(args):
    step_mul = 16
    opt_eps = 1e-8

    with open(args.log_file, mode='w') as log_file:
        # Removes "Namespace" from printout
        print('Args:', str(args)[9:], file=log_file)

        env = BuildMarinesEnvironment(
            render=args.render,
            step_multiplier=step_mul,
            verbose=args.verbose,
            enable_scv_helper=(not args.no_scvs),
            enable_kill_helper=(not args.no_kill),
        )
        run_settings = RunSettings(
            num_episodes=args.episodes,
            num_epochs=args.epochs,
            batch_size=args.batch_size,
            train_every=args.train_every,
            save_every=args.save_every,
            graph_every=args.graph_every,
            averaging_window=args.window,
            graph_file=args.graph_file,
            log_file=log_file,
            verbose=args.verbose,
        )

        if args.testagent:
            agent = TestAgent()
        else:
            agent_settings = AgentSettings(
                optimizer=torch.optim.Adam,
                learning_rate=args.lr,
                lr_gamma=args.lr_gamma,
                lr_step_size=args.lr_step_size,
                opt_eps=opt_eps,
                epsilon_max=0,
                epsilon_min=0,
                epsilon_duration=0,
                verbose=args.verbose,
            )
            memory = PolicyGradientMemory(buffer_len=args.memsize,
                                          discount=args.discount,
                                          averaging_window=args.window)
            model = PolicyGradientNet(
                num_blocks=args.resblocks,
                channels=args.channels,
                force_cpu=args.cpu,
            )
            agent = PolicyGradientAgent(
                init_temp=args.init_temp,
                temp_steps=args.temp_steps,
                save_file=args.model_file,
                log_file=log_file,
                model=model,
                settings=agent_settings,
                memory=memory,
                force_cpu=args.cpu,
            )
            agent.load()

        experiment = Experiment([agent], env, run_settings)
        experiment.train()
Пример #2
0
def run_training(
    opponent,
    mcts_opp,
    game_state_file,
    graph_file,
    model_save_file,
    mcts_iters,
    temp,
    tempsteps,
    lr,
    discount,
    memsize,
    num_episodes,
    num_epochs,
    batch_size,
    train_every,
    save_every,
    graph_every,
    averaging_window,
    opt_eps=1e-8,
    ucb_c=1.5,
    boardsize=8,
    inputs=20,
    render=False,
    verbose=False,
):
    env = PommermanEnvironment(
        render=render,
        num_agents=2,
        game_state_file=game_state_file,
    )

    run_settings = RunSettings(
        num_episodes=num_episodes,
        num_epochs=num_epochs,
        batch_size=batch_size,
        train_every=train_every,
        save_every=save_every,
        graph_every=graph_every,
        averaging_window=averaging_window,
        graph_file=graph_file,
        verbose=verbose,
    )

    agent_settings = AgentSettings(
        optimizer=torch.optim.Adam,
        learning_rate=lr,
        opt_eps=opt_eps,
        epsilon_max=0,
        epsilon_min=0,
        epsilon_duration=0,
        verbose=verbose,
    )

    memory = MCTSMemory(buffer_len=memsize, discount=discount)

    if mcts_opp is None:
        mcts_opp = opponent
    if mcts_opp == 'rand':
        opp = pommerman.agents.RandomAgent()
    elif mcts_opp == 'noop':
        opp = PommermanNoopAgent()
    elif mcts_opp == 'simp':
        opp = pommerman.agents.SimpleAgent()
    else:
        raise Exception('Invalid MCTS opponent type', mcts_opp)

    mcts_model = ActorCriticNet(board_size=boardsize, in_channels=inputs)
    agent1 = MCTSAgent(
        mcts_iters=mcts_iters,
        discount=discount,
        c=ucb_c,
        temp=temp,
        tempsteps=tempsteps,
        agent_id=0,
        opponent=opp,
        model_save_file=model_save_file,
        model=mcts_model,
        settings=agent_settings,
        memory=memory,
    )
    agent1.load()

    if opponent == 'rand':
        agent2 = RandomAgent()
    elif opponent == 'noop':
        agent2 = NoopAgent()
    elif opponent == 'simp':
        agent2 = SimpleAgent()
    else:
        raise Exception('Invalid opponent type', opponent)

    experiment = Experiment([agent1, agent2], env, run_settings)
    experiment.train()
Пример #3
0
    def index_spatial_probs(self, spatial_probs, indices):
        index_tuple = torch.meshgrid(
            [torch.arange(x) for x in spatial_probs.size()[:-2]]) + (
                indices[:, 0],
                indices[:, 1],
            )
        output = spatial_probs[index_tuple]
        return output

    def entropy(self, spatial_probs, nonspatial_probs):
        c3 = self.PPO_settings['c3']
        c4 = self.PPO_settings['c4']
        eps_denom = self.PPO_settings['eps_denom']

        prod_s = spatial_probs[:,
                               0, :, :] * torch.log(spatial_probs[:, 0, :, :] +
                                                    eps_denom)
        prod_n = nonspatial_probs * torch.log(nonspatial_probs + eps_denom)

        ent = -c3 * (torch.mean(torch.sum(prod_s, dim=(1, 2))))
        ent = ent - c4 * torch.mean(torch.sum(prod_n, dim=1))

        return ent


settings_ppo = AgentSettings(optimizer=optim.Adam,
                             learning_rate=0.00025,
                             epsilon_max=1.0,
                             epsilon_min=0.05,
                             epsilon_duration=1000000)
Пример #4
0
Файл: run.py Проект: rsun0/sc2rl
def main():

    ### Change this map if you must
    map_name = "DefeatRoaches"
    render = False
    step_mul = 8


    ### Edit this to be a list of sc2_env.Agent() variables, one for each agent
    ### or bot you want, unless you are playing a minigame
    players = None


    env = FullStateActionEnvironment(map_name_=map_name,
                                render=render,
                                step_multiplier=step_mul,
                                players=players)


    ### Set this to construct your desired network inheriting from BaseNetwork
    model = None

    ### Change these parameters and dicts to customize training

    lr = 1e-4
    eps_max = 0.3
    eps_min = 0.05
    eps_duration=1e5
    history_size=20


    num_episodes = 1000000
    num_epochs = 2
    batch_size = 32
    train_every = 2048
    save_every = 10240
    graph_every = 50
    averaging_window = 100

    """
        :param optimizer: A class from torch.optim (instantiated later)
        :param learning_rate: The learning rate for the network
        :param epsilon_max: The starting epsilon
        :param epsilon_min: The final epsilon
        :param epsilon_duration: The number of frames to reach the final epsilon
    """
    agent_settings = AgentSettings(torch.optim.Adam,
                                lr,
                                eps_max,
                                eps_min,
                                eps_duration)

    ### Unless you are changing code in interface, you shouldn't change this dict
    run_settings = RunSettings(num_episodes,
                                num_epochs,
                                batch_size,
                                train_every,
                                save_every,
                                graph_every,
                                averaging_window)

    ### Unless you are changing memory, you shouldn't change this
    memory = ReplayMemory(train_every, batch_size, hist_size=history_size)
    """
    Custom to how you want to train your agent.
    Unless you are changing base_agent and changing the training algorithm,
    or you want to tune train parameters, you should not change this dict.
    """
    train_settings = {
        "discount_factor": 0.99,
        "lambda": 0.95,
        "hist_size": history_size,
        "device": device,
        "eps_denom": 1e-6,
        "c1": 0.1,
        "c2": 0.05,
        "c3": 0.01,
        "c4": 0.01,
        "clip_param": 0.1,
        "map": map_name
    }

    """
    Constructs the agent and trains it in an experiment.
    """
    agent = BaseAgent(model, agent_settings, memory, train_settings)
    experiment = Experiment([agent], env, run_settings)
    experiment.train()
Пример #5
0
def main():

    map_name = "DefeatRoaches"
    render = False
    step_mul = 8

    env = MinigameEnvironment(state_modifier.graph_conv_modifier,
                              map_name_=map_name,
                              render=render,
                              step_multiplier=step_mul)

    nonspatial_act_size, spatial_act_depth = env.action_space

    model = GraphConvModel(nonspatial_act_size,
                           spatial_act_depth,
                           device=device).to(device)

    lr = 5e-3
    eps_max = 0.3
    eps_min = 0.05
    eps_duration = 1e5

    num_episodes = 1000000
    num_epochs = 3
    batch_size = 32
    train_every = 1024
    save_every = 10240
    graph_every = 50
    averaging_window = 100
    """
        :param optimizer: A class from torch.optim (instantiated later)
        :param learning_rate: The learning rate for the network
        :param epsilon_max: The starting epsilon
        :param epsilon_min: The final epsilon
        :param epsilon_duration: The number of frames to reach the final epsilon
    """
    agent_settings = AgentSettings(torch.optim.Adam, lr, eps_max, eps_min,
                                   eps_duration)

    run_settings = RunSettings(num_episodes, num_epochs, batch_size,
                               train_every, save_every, graph_every,
                               averaging_window)

    memory = ReplayMemory(train_every, 8, batch_size)

    PPO_settings = {
        "discount_factor": 0.99,
        "lambda": 0.95,
        "hist_size": 8,
        "device": device,
        "eps_denom": 1e-6,
        "c1": 1.0,
        "c2": 0.5,
        "c3": 0.5,
        "c4": 1.0,
        "clip_param": 0.1
    }

    agent = PPOAgent(model, agent_settings, memory, PPO_settings)
    experiment = Experiment([agent], env, run_settings)

    experiment.train()
Пример #6
0
Файл: run.py Проект: rsun0/sc2rl
def main():

    map_name = "DefeatRoaches"
    render = False
    step_mul = 8

    env = FullStateActionEnvironment(map_name_=map_name,
                                     render=render,
                                     step_multiplier=step_mul)

    state_embed = 10
    action_embed = 16

    lr = 1e-4
    opt_eps = 1e-8
    eps_max = 0.3
    eps_min = 0.05
    eps_duration = 2e4
    history_size = 5

    num_episodes = 10000000
    num_epochs = 3
    batch_size = 32
    train_every = 1024
    save_every = 10240
    graph_every = 50
    averaging_window = 100

    net_config = {
        "state_embedding_size":
        state_embed,  # number of features output by embeddings
        "action_embedding_size": action_embed,
        "down_conv_features": 128,
        "down_residual_depth": 2,
        "up_features": 32,
        "up_conv_features": 128,
        "resnet_features": 128,
        "LSTM_in_size": 64,
        "LSTM_hidden_size": 96,
        "inputs2d_size": 64,
        "inputs3d_width": 8,
        "relational_features": 32,
        "relational_depth": 3,
        "relational_heads": 3,
        "spatial_out_depth": 64,
        "channels3": 16,
        "history_size": history_size,
        "device": device
    }

    #action_space = np.zeros(full_action_space.shape)
    #action_space[[0, 3, 12, 13, 331, 332]] = 1
    action_space = np.ones(full_action_space.shape)
    model = RRLModel(net_config, device=device,
                     action_space=action_space).to(device)
    print(model)
    """
        :param optimizer: A class from torch.optim (instantiated later)
        :param learning_rate: The learning rate for the network
        :param epsilon_max: The starting epsilon
        :param epsilon_min: The final epsilon
        :param epsilon_duration: The number of frames to reach the final epsilon
    """
    agent_settings = AgentSettings(torch.optim.Adam, lr, eps_max, eps_min,
                                   eps_duration, opt_eps)

    run_settings = RunSettings(num_episodes, num_epochs, batch_size,
                               train_every, save_every, graph_every,
                               averaging_window)

    memory = ReplayMemory(train_every, batch_size, hist_size=history_size)

    train_settings = {
        "discount_factor": 0.99,
        "lambda": 0.95,
        "hist_size": history_size,
        "device": device,
        "eps_denom": 1e-5,
        "c1": 0.1,
        "c2": 0.01,
        "c3": 1.0,
        "c4": 1.0,
        "minc2": 0.01,
        "clip_param": 0.1,
        "min_clip_param": 0.01,
        "clip_decay": 10000,
        "c2_decay": 10000,
        "map": map_name,
        "history_size": history_size
    }

    agent = BaseAgent(model, agent_settings, memory, train_settings)
    #agent.load()
    experiment = Experiment([agent], env, run_settings)

    #experiment.test()
    experiment.train()