config.hyperparameters = {
    "Policy_Gradient_Agents": {
        "learning_rate": 0.02,
        "nn_layers": 2,
        "nn_start_units": 20,
        "nn_unit_decay": 1.0,
        "final_layer_activation": "TANH",
        "learning_iterations_per_round": 10,
        "discount_rate": 0.99,
        "batch_norm": False,
        "clip_epsilon": 0.2,
        "episodes_per_learning_round": 7,
        "normalise_rewards": True,
        "gradient_clipping_norm": 5,
        "mu": 0.0,
        "theta": 0.15,
        "sigma": 0.25,
        "noise_decay_denominator": 1
    },
    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "nn_layers": 2,
            "nn_start_units": 20,
            "nn_unit_decay": 1.0,
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.001,
            "gradient_clipping_norm": 1
        },
        "Critic": {
            "learning_rate": 0.01,
            "nn_layers": 2,
            "nn_start_units": 20,
            "nn_unit_decay": 1.0,
            "final_layer_activation": None,
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.001,
            "gradient_clipping_norm": 1
        },
        "batch_size": 256,
        "discount_rate": 0.99,
        "mu": 0.0,
        "theta": 0.15,
        "sigma": 0.25,  #0.22 did well before
        "noise_decay_denominator": 5,
        "update_every_n_steps": 10,
        "learning_updates_per_learning_session": 5
    }
}
示例#2
0
config.hyperparameters = {
    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "nn_layers": 5,
            "nn_start_units": 50,
            "nn_unit_decay": 1.0,
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "nn_layers": 6,
            "nn_start_units": 50,
            "nn_unit_decay": 1.0,
            "final_layer_activation": None,
            "batch_norm": False,
            "buffer_size": 30000,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0,
        "theta": 0.15,
        "sigma": 0.25,  # 0.22 did well before
        "update_every_n_steps": 10,
        "learning_updates_per_learning_session": 10
    }
}
config.hyperparameters = {
    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [50, 50],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [50, 50, 50],
            "final_layer_activation": None,
            "batch_norm": False,
            "buffer_size": 30000,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0,
        "theta": 0.15,
        "sigma": 0.25,
        "update_every_n_steps": 10,
        "learning_updates_per_learning_session": 10,
        "HER_sample_proportion": 0.8
    }
}
示例#4
0
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 256,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
        "discount_rate": 0.99,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 1,
        "nn_layers": 3,
        "nn_start_units": 20,
        "nn_unit_decay": 1.0,
        "final_layer_activation": None,
        "batch_norm": False,
        "gradient_clipping_norm": 5
    },
    "Stochastic_Policy_Search_Agents": {
        "policy_network_type": "Linear",
        "noise_scale_start": 1e-2,
        "noise_scale_min": 1e-3,
        "noise_scale_max": 2.0,
        "noise_scale_growth_factor": 2.0,
        "stochastic_action_decision": False,
        "num_policies": 10,
        "episodes_per_policy": 1,
        "num_policies_to_keep": 5
    },
    "Policy_Gradient_Agents": {
        "learning_rate": 0.05,
        "nn_layers": 2,
        "nn_start_units": 20,
        "nn_unit_decay": 1.0,
        "final_layer_activation": "SOFTMAX",
        "learning_iterations_per_round": 10,
        "discount_rate": 0.99,
        "batch_norm": False,
        "clip_epsilon": 0.1,
        "episodes_per_learning_round": 7,
        "normalise_rewards": True,
        "gradient_clipping_norm": 5,
        "mu": 0.0, #only required for continuous action games
        "theta": 0.0, #only required for continuous action games
        "sigma": 0.0, #only required for continuous action games
        "noise_decay_denominator": 1 #only required for continuous action games
    }
}