示例#1
0
def get_solution_brain_set():
    memory = PrioritizedMemory(
        capacity=REPLAY_BUFFER_SIZE,
        state_shape=(1, STATE_SIZE),
        # Anneal alpha linearly
        alpha_scheduler=ParameterScheduler(
            initial=0.6,
            lambda_fn=lambda i: 0.6 - 0.6 * i / NUM_EPISODES,
            final=0.),
        # Anneal beta linearly
        beta_scheduler=ParameterScheduler(
            initial=0.4,
            final=1,
            lambda_fn=lambda i: 0.4 + 0.6 * i / NUM_EPISODES
        ),  # Anneal beta linearly
        seed=SEED,
        continuous_actions=True,
        min_priority=MIN_PRIORITY)

    reacher_brain = Brain(
        brain_name=BRAIN_NAME,
        action_size=ACTION_SIZE,
        state_shape=STATE_SIZE,
        observation_type='vector',
        agents=[get_agent(memory)],
    )

    brain_set = BrainSet(brains=[reacher_brain])
    return brain_set
示例#2
0
def get_solution_brain_set():
    params = deepcopy(default_cfg)
    update_params = {
        "MLP_FEATURES_HIDDEN": (512, ),
        "OUTPUT_FC_HIDDEN_SIZES": (128, ),
        "NUM_STACKED_FRAMES": 1,
        "MLP_FEATURES_DROPOUT": None,
        "OUTPUT_HIDDEN_DROPOUT": None,
        "DUELING": True,
    }

    params.update(update_params)

    policy = get_policy(ACTION_SIZE, params)

    featurizer = MLP(tuple([VECTOR_STATE_SHAPE[1]] +
                           list(params['MLP_FEATURES_HIDDEN'])),
                     dropout=params['MLP_FEATURES_DROPOUT'],
                     activation_function=nn.ReLU(),
                     output_function=nn.ReLU(),
                     seed=SEED)

    model = DQN(
        VECTOR_STATE_SHAPE,
        ACTION_SIZE,
        featurizer,
        params['MLP_FEATURES_HIDDEN'][-1],
        seed=SEED,
        grayscale=params["GRAYSCALE"],
        num_stacked_frames=params["NUM_STACKED_FRAMES"],
        output_hidden_layer_size=params["OUTPUT_FC_HIDDEN_SIZES"],
        OUTPUT_HIDDEN_DROPOUT=params["OUTPUT_HIDDEN_DROPOUT"],
        dueling_output=params["DUELING"],
        noisy_output=params['NOISY'],
        categorical_output=params['CATEGORICAL'],
    )

    print(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=params['INITIAL_LR'])
    memory = get_memory(VECTOR_STATE_SHAPE, params)
    solution_agent = get_agent(VECTOR_STATE_SHAPE, ACTION_SIZE, model, policy,
                               memory, optimizer, params)

    banana_brain_ = Brain(
        brain_name=BRAIN_NAME,
        action_size=ACTION_SIZE,
        state_shape=VECTOR_STATE_SHAPE,
        observation_type='vector',
        agents=[solution_agent],
    )

    brain_set_ = BrainSet(brains=[banana_brain_])

    return brain_set_, params
示例#3
0
def get_solution_brain_set():
    agent = PPOAgent(
        state_size=STATE_SIZE,
        action_size=ACTION_SIZE,
        seed=SEED,
        actor_critic_factory=lambda: PPO_Actor_Critic(
            actor_model=MLP(layer_sizes=(STATE_SIZE, 128, 128, ACTION_SIZE),
                            seed=SEED,
                            output_function=torch.nn.Tanh(),
                            with_batchnorm=BATCHNORM,
                            output_layer_initialization_fn=lambda l:
                            init_layer_within_range(l),
                            hidden_layer_initialization_fn=lambda l:
                            init_layer_inverse_root_fan_in(l),
                            activation_function=torch.nn.LeakyReLU(True),
                            dropout=DROPOUT),
            critic_model=MLP(layer_sizes=(STATE_SIZE, 128, 128, 1),
                             seed=SEED,
                             output_function=torch.nn.Tanh(),
                             with_batchnorm=BATCHNORM,
                             output_layer_initialization_fn=lambda l:
                             init_layer_within_range(l),
                             hidden_layer_initialization_fn=lambda l:
                             init_layer_inverse_root_fan_in(l),
                             activation_function=torch.nn.LeakyReLU(True),
                             dropout=DROPOUT),
            action_size=ACTION_SIZE,
            continuous_actions=True,
        ),
        optimizer_factory=lambda params: torch.optim.Adam(
            params, lr=LR, weight_decay=WEIGHT_DECAY, eps=EPSILON),
        batch_size=BATCH_SIZE,
    )

    crawler_brain = Brain(
        brain_name=BRAIN_NAME,
        action_size=ACTION_SIZE,
        state_shape=STATE_SIZE,
        observation_type='vector',
        agents=[agent],
    )
    brain_set = BrainSet(brains=[crawler_brain])
    return brain_set
示例#4
0
        brain_name=GOALIE_BRAIN_NAME,
        action_size=GOALIE_ACTION_SIZE,
        state_shape=GOALIE_STATE_SIZE,
        observation_type='vector',
        agents=goalie_agents,
    )

    striker_brain = Brain(
        brain_name=STRIKER_BRAIN_NAME,
        action_size=STRIKER_ACTION_SIZE,
        state_shape=STRIKER_STATE_SIZE,
        observation_type='vector',
        agents=striker_agents,
    )

    brain_set = BrainSet(brains=[goalie_brain, striker_brain])

    for brain_name, brain in brain_set:
        for agent_num, agent in enumerate(brain.agents):
            agent_id = "{}_{}".format(brain_name, agent_num)
            if brain_name == 'GoalieBrain':
                action_size = GOALIE_ACTION_SIZE
                action_range = GOALIE_ACTION_DISCRETE_RANGE
            elif brain_name == 'StrikerBrain':
                action_size = STRIKER_ACTION_SIZE
                action_range = STRIKER_ACTION_DISCRETE_RANGE
            else:
                raise ValueError('f**k')

            agent.policy = IndependentMADDPGPolicy(
                brain_set=brain_set,
示例#5
0
def banana_tuning(update_params: dict):
    params = deepcopy(default_cfg)
    params.update(update_params)
    try:
        params['OUTPUT_FC_HIDDEN_SIZES'] = ast.literal_eval(
            params['OUTPUT_FC_HIDDEN_SIZES'])
        params['SUPPORT_RANGE'] = ast.literal_eval(params['SUPPORT_RANGE'])
        params['MLP_FEATURES_HIDDEN'] = ast.literal_eval(
            params['MLP_FEATURES_HIDDEN'])

        policy = get_policy(ACTION_SIZE, params)

        featurizer = MLP(tuple([VECTOR_STATE_SHAPE[1]] +
                               list(params['MLP_FEATURES_HIDDEN'])),
                         dropout=params['MLP_FEATURES_DROPOUT'],
                         activation_function=nn.ReLU(True),
                         output_function=nn.ReLU(True),
                         seed=SEED)

        model = DQN(
            VECTOR_STATE_SHAPE,
            ACTION_SIZE,
            featurizer,
            params['MLP_FEATURES_HIDDEN'][-1],
            seed=SEED,
            grayscale=params["GRAYSCALE"],
            num_stacked_frames=params["NUM_STACKED_FRAMES"],
            output_hidden_layer_size=params["OUTPUT_FC_HIDDEN_SIZES"],
            OUTPUT_HIDDEN_DROPOUT=params["OUTPUT_HIDDEN_DROPOUT"],
            dueling_output=params["DUELING"],
            noisy_output=params['NOISY'],
            categorical_output=params['CATEGORICAL'],
        )

        print(model)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=params['INITIAL_LR'])

        memory = get_memory(VECTOR_STATE_SHAPE, params)

        agent = get_agent(VECTOR_STATE_SHAPE, ACTION_SIZE, model, policy,
                          memory, optimizer, params)

        banana_brain = Brain(
            brain_name=BRAIN_NAME,
            action_size=ACTION_SIZE,
            state_shape=VECTOR_STATE_SHAPE,
            observation_type='vector',
            agents=[agent],
        )

        brain_set = BrainSet(brains=[banana_brain])

        # Run performance evaluation
        performance, info = simulator.get_agent_performance(
            brain_set=brain_set,
            n_train_episodes=params["N_EPISODES"],
            n_eval_episodes=params["N_EVAL_EPISODES"],
            max_t=params["MAX_T"],
        )
        info['input_params'] = params

        write_tuning_data(info, performance)

        global TRIAL_COUNTER
        TRIAL_COUNTER += 1

        print("Performance is : {}".format(performance))
        return performance
    except Exception as e:
        print(e)
        return 0
示例#6
0
def visual_banana_tuning(update_params: dict):
    params = deepcopy(default_cfg)
    params.update(update_params)
    try:
        params['SUPPORT_RANGE'] = ast.literal_eval(params['SUPPORT_RANGE'])
        params['OUTPUT_FC_HIDDEN_SIZES'] = ast.literal_eval(params['OUTPUT_FC_HIDDEN_SIZES'])
        params['FILTERS'] = ast.literal_eval(params['FILTERS'])
        params['KERNEL_SIZES'] = [ast.literal_eval(i) for i in ast.literal_eval(params["KERNEL_SIZES"])]
        params['STRIDE_SIZES'] = [ast.literal_eval(i) for i in ast.literal_eval(params["STRIDE_SIZES"])]

        policy = get_policy(ACTION_SIZE, params)
        print(params)
        featurizer = CNN(
            image_shape=IMAGE_SHAPE,
            num_stacked_frames=params["NUM_STACKED_FRAMES"],
            grayscale=params["GRAYSCALE"],
            filters=params["FILTERS"],
            kernel_sizes=params["KERNEL_SIZES"],
            stride_sizes=params["STRIDE_SIZES"],
        )

        model = VisualDQN(
            VISUAL_STATE_SHAPE,
            ACTION_SIZE,
            featurizer,
            featurizer.output_size,
            seed=SEED,
            grayscale=params["GRAYSCALE"],
            num_stacked_frames=params["NUM_STACKED_FRAMES"],
            output_hidden_layer_size=params["OUTPUT_FC_HIDDEN_SIZES"],
            OUTPUT_HIDDEN_DROPOUT=params["OUTPUT_HIDDEN_DROPOUT"],
            dueling_output=params["DUELING"],
            noisy_output=params['NOISY'],
            categorical_output=params['CATEGORICAL'],
        )

        print(model)
        optimizer = torch.optim.Adam(model.parameters(), lr=params['INITIAL_LR'])

        memory = get_memory(VISUAL_STATE_SHAPE, params)

        agent = get_agent(VISUAL_STATE_SHAPE, ACTION_SIZE, model, policy, memory, optimizer, params)

        # Run performance evaluation
        banana_brain = Brain(
            brain_name=BRAIN_NAME,
            action_size=ACTION_SIZE,
            state_shape=VISUAL_STATE_SHAPE,
            observation_type='visual',
            agents=[agent],
            preprocess_state_fn=get_preprocess_state_fn(params)
        )

        brain_set = BrainSet(brains=[banana_brain])

        performance, info = simulator.get_agent_performance(
            brain_set=brain_set,
            n_train_episodes=params["N_EPISODES"],
            n_eval_episodes=params["N_EVAL_EPISODES"],
            max_t=params["MAX_T"],
        )
        info['input_params'] = params

        global TRIAL_COUNTER
        TRIAL_COUNTER += 1

        write_tuning_data(info, performance)

        print(f"Performance is : {performance}")
        return performance
    except Exception as e:
        # Failures can occur do to invalid CNN sizes
        print("FAILURE IN HYPERPARAMETER TUNING::: {}, {}".format(e, sys.exc_info()))
        return 0
示例#7
0
def get_solution_brain_set():
    tennis_agents = []
    for i in range(2):
        key = "TennisBrain_{}".format(i)
        agent = MAPPOAgent(
            agent_id=key,
            state_size=STATE_SIZE,
            action_size=ACTION_SIZE,
            map_agent_to_state_slice={
                "TennisBrain_0": lambda t: t[:, 0:24],
                "TennisBrain_1": lambda t: t[:, 24:48]
            },
            map_agent_to_action_slice={
                "TennisBrain_0": lambda t: t[:, 0:2],
                "TennisBrain_1": lambda t: t[:, 2:4]
            },
            actor_critic_factory=lambda: MAPPO_Actor_Critic(
                actor_model=MLP(
                    layer_sizes=(STATE_SIZE, 256, 128, ACTION_SIZE),
                    seed=SEED,
                    # output_function=BoundVectorNorm(),
                    output_function=torch.nn.Tanh(),
                    with_batchnorm=BATCHNORM,
                    activation_function=torch.nn.ReLU(True),
                    hidden_layer_initialization_fn=
                    init_layer_inverse_root_fan_in,
                    output_layer_initialization_fn=get_init_layer_within_rage(
                        limit_range=(-3e-4, 3e-4)),
                    dropout=DROPOUT),
                critic_model=MACritic(
                    state_featurizer=MLP(
                        layer_sizes=(STATE_SIZE * 2 + ACTION_SIZE, 256),
                        with_batchnorm=BATCHNORM,
                        dropout=DROPOUT,
                        seed=SEED,
                        output_function=torch.nn.ReLU(),
                    ),
                    output_module=MLP(
                        layer_sizes=(256 + ACTION_SIZE, 128, 1),
                        with_batchnorm=BATCHNORM,
                        dropout=DROPOUT,
                        seed=SEED,
                        output_layer_initialization_fn=
                        get_init_layer_within_rage(limit_range=(-3e-4, 3e-4)),
                        activation_function=torch.nn.ReLU(True),
                    ),
                ),
                action_size=ACTION_SIZE,
                continuous_actions=True,
            ),
            optimizer_factory=lambda params: torch.optim.AdamW(
                params, lr=LR, weight_decay=WEIGHT_DECAY, eps=EPSILON),
            continuous_action_range_clip=(-1, 1),
            batch_size=256,
            min_batches_for_training=16,
            num_learning_updates=10,
            beta_scheduler=ParameterScheduler(initial=0.01,
                                              lambda_fn=lambda i: 0.01,
                                              final=0.01),
            std_scale_scheduler=ParameterScheduler(
                initial=0.8, lambda_fn=lambda i: 0.8 * 0.999**i, final=0.2),
            seed=SEED)
        tennis_agents.append(agent)

    tennis_brain = Brain(
        brain_name="TennisBrain",
        action_size=ACTION_SIZE,
        state_shape=STATE_SIZE,
        observation_type='vector',
        agents=tennis_agents,
    )

    brain_set = BrainSet(brains=[tennis_brain])
    return brain_set
示例#8
0
def get_solution_brain_set():
    # Define the solution hyper parameters
    params = deepcopy(default_cfg)

    update_params = {
        "INITIAL_LR": 5e-4,
        "NUM_STACKED_FRAMES": 4,
        "OUTPUT_HIDDEN_DROPOUT": 0.1,
        "DUELING": True,
        "NOISY": True,
        "BATCH_SIZE": 64,
        "N_FILTERS": (64, 128, 128),
        "EPS_DECAY_FACTOR": 0.995,
        "KERNEL_SIZES": [(1, 8, 8), (1, 4, 4), (4, 3, 3)],
        "STRIDE_SIZES": [(1, 4, 4), (1, 2, 2), (1, 3, 3)],
        "OUTPUT_FC_HIDDEN_SIZES": (1024, ),
        "WARMUP_STEPS": 10000,
    }

    params.update(update_params)

    print("Params are: {}".format(json.dumps(params, indent=2)))

    policy = get_policy(ACTION_SIZE, params)

    featurizer = CNN(
        image_shape=VISUAL_STATE_SHAPE[1:],
        num_stacked_frames=params["NUM_STACKED_FRAMES"],
        grayscale=params["GRAYSCALE"],
        nfilters=params["N_FILTERS"],
        kernel_sizes=params["KERNEL_SIZES"],
        stride_sizes=params["STRIDE_SIZES"],
    )

    model = VisualDQN(
        VISUAL_STATE_SHAPE,
        ACTION_SIZE,
        featurizer,
        featurizer.output_size,
        seed=SEED,
        grayscale=params["GRAYSCALE"],
        num_stacked_frames=params["NUM_STACKED_FRAMES"],
        output_hidden_layer_size=params["OUTPUT_FC_HIDDEN_SIZES"],
        OUTPUT_HIDDEN_DROPOUT=params["OUTPUT_HIDDEN_DROPOUT"],
        dueling_output=params["DUELING"],
        noisy_output=params['NOISY'],
        categorical_output=params['CATEGORICAL'],
    )

    print(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=params['INITIAL_LR'])

    memory = get_memory(VISUAL_STATE_SHAPE, params)

    solution_agent = get_agent(VISUAL_STATE_SHAPE, ACTION_SIZE, model, policy,
                               memory, optimizer, params)

    banana_brain_ = Brain(brain_name=BRAIN_NAME,
                          action_size=ACTION_SIZE,
                          state_shape=VISUAL_STATE_SHAPE,
                          observation_type='visual',
                          agents=[solution_agent],
                          preprocess_state_fn=get_preprocess_state_fn(params))

    brain_set_ = BrainSet(brains=[banana_brain_])
    return brain_set_, params
示例#9
0
def get_solution_brain_set():
    tennis_agents = []

    state_featurizer = MLP(
        layer_sizes=(STATE_SIZE * 2 + ACTION_SIZE, 400),
        with_batchnorm=BATCHNORM,
        activation_function=torch.nn.ReLU(True),
    )
    output_module = MLP(
        layer_sizes=(400 + ACTION_SIZE, 300, 1),
        with_batchnorm=BATCHNORM,
        activation_function=torch.nn.ReLU(True),
        output_layer_initialization_fn=get_init_layer_within_rage(
            limit_range=(-3e-4, 3e-4)))

    memory_factory = lambda: PrioritizedMemory(
        capacity=BUFFER_SIZE,
        state_shape=(1, STATE_SIZE),
        alpha_scheduler=ParameterScheduler(initial=0.6,
                                           lambda_fn=lambda i: 0.6 - 0.6 * i /
                                           NUM_EPISODES,
                                           final=0.),
        beta_scheduler=
        ParameterScheduler(initial=0.4,
                           final=1,
                           lambda_fn=lambda i: 0.4 + 0.6 * i / NUM_EPISODES
                           ),  # Anneal beta linearly
        seed=SEED,
        continuous_actions=True,
        min_priority=1e-4)

    if MATD3:
        critic_factory = lambda: MATD3Critic(
            critic_model_factory=lambda: MACritic(
                state_featurizer=state_featurizer,
                output_module=output_module,
                seed=SEED,
            ),
            seed=SEED)
    else:
        critic_factory = lambda: MACritic(
            state_featurizer=state_featurizer,
            output_module=output_module,
        )

    for i in range(2):
        key = "TennisBrain_{}".format(i)
        tennis_agent = MADDPGAgent(
            key,
            None,
            STATE_SIZE,
            ACTION_SIZE,
            critic_factory=critic_factory,
            actor_factory=lambda: MLP(
                layer_sizes=(STATE_SIZE, 400, 300, ACTION_SIZE),
                with_batchnorm=BATCHNORM,
                dropout=DROPOUT,
                output_function=BoundVectorNorm(),
                output_layer_initialization_fn=init_layer_within_range,
                hidden_layer_initialization_fn=init_layer_inverse_root_fan_in,
                seed=SEED),
            critic_optimizer_factory=lambda parameters: optim.Adam(
                parameters, lr=CRITIC_LR, weight_decay=1.e-5),
            actor_optimizer_factory=lambda parameters: optim.Adam(parameters,
                                                                  lr=ACTOR_LR),
            memory_factory=memory_factory,
            seed=0,
            batch_size=BATCH_SIZE,
            homogeneous_agents=False,
        )

        tennis_agents.append(tennis_agent)

    tennis_brain = Brain(
        brain_name=BRAIN_NAME,
        action_size=ACTION_SIZE,
        state_shape=STATE_SIZE,
        observation_type='vector',
        agents=tennis_agents,
    )

    brain_set = BrainSet(brains=[tennis_brain])

    # Update the policy with the independent MADDPG policy
    # This is done so that each agent will receive the other agents'
    # states/actions during training to guide actor learning.
    for i, agent in enumerate(tennis_agents):
        agent_id = "TennisBrain_{}".format(i)
        agent.policy = IndependentMADDPGPolicy(
            brain_set=brain_set,
            agent_id=agent_id,
            action_dim=ACTION_SIZE,
            epsilon_scheduler=ParameterScheduler(initial=1,
                                                 lambda_fn=lambda i: 0.99**i,
                                                 final=0.01),
            random_brain_action_factory=lambda: RandomBrainAction(
                ACTION_SIZE,
                1,
                continuous_actions=True,
                continuous_action_range=(-1, 1),
            ),
            map_agent_to_state_slice={
                "TennisBrain_0": lambda t: t[:, 0:24],
                "TennisBrain_1": lambda t: t[:, 24:48]
            },
            map_agent_to_action_slice={
                "TennisBrain_0": lambda t: t[:, 0:2],
                "TennisBrain_1": lambda t: t[:, 2:4]
            },
            matd3=MATD3,
            gaussian_noise_factory=lambda: GaussianNoise(),
            continuous_actions=True,
            continuous_actions_clip_range=(-1, 1))

    return brain_set
示例#10
0
def get_solution_brain_set():
    params = {
        'striker_actor_layer_size': (STRIKER_STATE_SIZE, 256, 256, len(range(*STRIKER_ACTION_DISCRETE_RANGE))),
        'goalie_actor_layer_size': (GOALIE_STATE_SIZE, 256, 256, len(range(*GOALIE_ACTION_DISCRETE_RANGE))),
        'striker_critic_state_featurizer_layer_size': (336*4 + 3, 256),
        'striker_critic_output_layer_size': (256 + 1, 256, 1),
        'goalie_critic_state_featurizer_layer_size': (336 * 4 + 3, 256),
        'goalie_critic_output_layer_size': (256 + 1, 256, 1),
        'batchnorm': True,
        'actor_dropout': 0.1,
        'critic_dropout': 0.2,
        'lr': 5e-3,
        'weight_decay': 1e-4,
        'eps': 1e-6,
        'num_ppo_epochs': 4,
        'minimum_training_batches': 32,
        'batch_size': 1024
    }

    goalie_agents = []
    for agent_num in range(NUM_GOALIE_AGENTS):
        key = 'GoalieBrain_{}'.format(agent_num)
        if agent_num == 1:
            goalie_agent = DummyMADDPGAgent(
                GOALIE_STATE_SIZE,
                len(range(*GOALIE_ACTION_DISCRETE_RANGE)),
                seed=SEED,
                map_agent_to_state_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:336],
                    "GoalieBrain_1": lambda t: t[:, 336:672],
                    "StrikerBrain_0": lambda t: t[:, 672:1008],
                    "StrikerBrain_1": lambda t: t[:, 1008:]
                },
                map_agent_to_action_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:1],
                    "GoalieBrain_1": lambda t: t[:, 1:2],
                    "StrikerBrain_0": lambda t: t[:, 2:3],
                    "StrikerBrain_1": lambda t: t[:, 3:4]
                },
            )
        else:
            goalie_agent = MAPPOAgent(
                agent_id=key,
                state_size=GOALIE_STATE_SIZE,
                action_size=len(range(*GOALIE_ACTION_DISCRETE_RANGE)),
                seed=SEED,
                map_agent_to_state_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:336],
                    "GoalieBrain_1": lambda t: t[:, 336:672],
                    "StrikerBrain_0": lambda t: t[:, 672:1008],
                    "StrikerBrain_1": lambda t: t[:, 1008:]
                },
                map_agent_to_action_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:1],
                    "GoalieBrain_1": lambda t: t[:, 1:2],
                    "StrikerBrain_0": lambda t: t[:, 2:3],
                    "StrikerBrain_1": lambda t: t[:, 3:4]
                },
                actor_critic_factory=lambda: MAPPO_Actor_Critic(
                    actor_model=MLP(
                        layer_sizes=params['goalie_actor_layer_size'],
                        seed=SEED,
                        output_function=torch.nn.Softmax(),
                        with_batchnorm=params['batchnorm'],
                        activation_function=torch.nn.LeakyReLU(True),
                        dropout=params['actor_dropout']
                    ),
                    critic_model=MACritic(
                        state_featurizer=MLP(
                            layer_sizes=params['goalie_critic_state_featurizer_layer_size'],
                            with_batchnorm=params['batchnorm'],
                            dropout=params['critic_dropout'],
                            seed=SEED
                        ),
                        output_module=MLP(
                            layer_sizes=params['goalie_critic_output_layer_size'],
                            with_batchnorm=params['batchnorm'],
                            dropout=params['critic_dropout'],
                            seed=SEED,
                        ),
                    ),
                    action_size=GOALIE_ACTION_SIZE,
                    continuous_actions=False,
                    seed=SEED
                ),
                min_batches_for_training=params['minimum_training_batches'],
                num_learning_updates=params['num_ppo_epochs'],
                optimizer_factory=lambda model_params: torch.optim.AdamW(
                    model_params, lr=params['lr'], weight_decay=params['weight_decay'], eps=params['eps']
                ),
                continuous_actions=False,
                batch_size=params['batch_size'],
                beta_scheduler=ParameterScheduler(initial=0.01, lambda_fn=lambda i: 0.01, final=0.01),
                std_scale_scheduler=ParameterScheduler(initial=0.8,
                                                       lambda_fn=lambda i: 0.8 * 0.999 ** i,
                                                       final=0.2),
            )
            print("Goalie is: {}".format(goalie_agent.online_actor_critic))
        goalie_agents.append(goalie_agent)

    striker_agents = []
    for agent_num in range(NUM_STRIKER_AGENTS):
        key = 'StrikerBrain_{}'.format(agent_num)
        if agent_num == 1:
            striker_agent = DummyMADDPGAgent(
                STRIKER_STATE_SIZE,
                len(range(*STRIKER_ACTION_DISCRETE_RANGE)),
                SEED,
                map_agent_to_state_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:336],
                    "GoalieBrain_1": lambda t: t[:, 336:672],
                    "StrikerBrain_0": lambda t: t[:, 672:1008],
                    "StrikerBrain_1": lambda t: t[:, 1008:]
                },
                map_agent_to_action_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:1],
                    "GoalieBrain_1": lambda t: t[:, 1:2],
                    "StrikerBrain_0": lambda t: t[:, 2:3],
                    "StrikerBrain_1": lambda t: t[:, 3:4]
                },
            )
        else:
            striker_agent = MAPPOAgent(
                agent_id=key,
                state_size=STRIKER_STATE_SIZE,
                action_size=len(range(*STRIKER_ACTION_DISCRETE_RANGE)),
                seed=SEED,
                map_agent_to_state_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:336],
                    "GoalieBrain_1": lambda t: t[:, 336:672],
                    "StrikerBrain_0": lambda t: t[:, 672:1008],
                    "StrikerBrain_1": lambda t: t[:, 1008:]
                },
                map_agent_to_action_slice={
                    "GoalieBrain_0": lambda t: t[:, 0:1],
                    "GoalieBrain_1": lambda t: t[:, 1:2],
                    "StrikerBrain_0": lambda t: t[:, 2:3],
                    "StrikerBrain_1": lambda t: t[:, 3:4]
                },
                actor_critic_factory=lambda: MAPPO_Actor_Critic(
                    actor_model=MLP(
                        layer_sizes=params['striker_actor_layer_size'],
                        seed=SEED,
                        output_function=torch.nn.Softmax(),
                        with_batchnorm=params['batchnorm'],
                        activation_function=torch.nn.LeakyReLU(True),
                        dropout=params['actor_dropout']
                    ),
                    critic_model=MACritic(
                        state_featurizer=MLP(
                            layer_sizes=params['striker_critic_state_featurizer_layer_size'],
                            with_batchnorm=params['batchnorm'],
                            dropout=params['critic_dropout'],
                            seed=SEED,
                        ),
                        output_module=MLP(
                            layer_sizes=params['striker_critic_output_layer_size'],
                            with_batchnorm=params['batchnorm'],
                            dropout=params['critic_dropout'],
                            seed=SEED,
                        ),
                    ),
                    action_size=STRIKER_ACTION_SIZE,
                    continuous_actions=False,
                    seed=SEED
                ),
                optimizer_factory=lambda model_params: torch.optim.AdamW(
                    model_params, lr=params['lr'], weight_decay=params['weight_decay'], eps=params['eps']
                ),
                min_batches_for_training=params['minimum_training_batches'],
                num_learning_updates=params['num_ppo_epochs'],
                continuous_actions=False,
                batch_size=params['batch_size'],
                beta_scheduler=ParameterScheduler(initial=0.01, lambda_fn=lambda i: 0.01, final=0.01),
                std_scale_scheduler=ParameterScheduler(initial=0.8,
                                                       lambda_fn=lambda i: 0.8 * 0.999 ** i,
                                                       final=0.2),
            )
            print("Striker is: {}".format(striker_agent.online_actor_critic))
        striker_agents.append(striker_agent)

    goalie_brain = Brain(
        brain_name=GOALIE_BRAIN_NAME,
        action_size=GOALIE_ACTION_SIZE,
        state_shape=GOALIE_STATE_SIZE,
        observation_type='vector',
        agents=goalie_agents,
    )

    striker_brain = Brain(
        brain_name=STRIKER_BRAIN_NAME,
        action_size=STRIKER_ACTION_SIZE,
        state_shape=STRIKER_STATE_SIZE,
        observation_type='vector',
        agents=striker_agents,
    )

    brain_set = BrainSet(brains=[goalie_brain, striker_brain])
    return brain_set