Exemplo n.º 1
0
    def make_model(self, args):
        env = GymEnv(args.game)
        num_dims = env.observation_dims()["sensor"]
        action_dims = env.action_dims()["action"]

        print("Input dims: {}".format(num_dims))
        print("Action dims: {}".format(action_dims))
        print("Action space: {}".format(env.action_range()["action"]))

        hidden_size = 256
        mlp = nn.Sequential(nn.Linear(num_dims[0], hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU(),
                            nn.Linear(hidden_size, hidden_size), nn.ReLU())
        model = GaussianPolicyModel(num_dims, action_dims, perception_net=mlp)
        return model
Exemplo n.º 2
0
from flare.agent_zoo.simple_rl_agents import SimpleRNNRLAgent
from flare.framework.agent import OnlineHelper
from flare.env_zoo.gym_env import GymEnv

if __name__ == '__main__':
    """
    A demo of how to run a simple RL experiment
    """
    game = "CartPole-v0"

    num_agents = 16
    num_games = 8000

    env = GymEnv(game)
    state_shape = env.observation_dims()["sensor"]
    num_actions = env.action_dims()["action"]

    # 1. Spawn one agent for each instance of environment.
    #    Agent's behavior depends on the actual algorithm being used. Since we
    #    are using SimpleAC, a proper type of Agent is SimpleRLAgent.
    reward_shaping_f = lambda x: x / 100.0
    agents = []
    for _ in range(num_agents):
        agent = SimpleRNNRLAgent(num_games, reward_shaping_f=reward_shaping_f)
        agent.set_env(GymEnv, game_name=game)
        agents.append(agent)

    # 2. Construct the network and specify the algorithm.
    #    Here we use a small MLP and apply the Actor-Critic algorithm
    hidden_size = 128
    mlp = nn.Sequential(nn.Linear(state_shape[0], hidden_size), nn.ReLU(),
Exemplo n.º 3
0
        self.action_noise.reset()
        return super(ActionNoiseAgent, self)._reset_env()


if __name__ == '__main__':
    """
    A demo of how to run a simple RL experiment
    """
    game = "Pendulum-v0"

    num_agents = 1
    num_games = 10000

    env = GymEnv(game)
    state_shape = env.observation_dims()["sensor"]
    action_dims = env.action_dims()["action"]

    # 1. Spawn one agent for each instance of environment.
    #    Agent's behavior depends on the actual algorithm being used. Since we
    #    are using DDPG, a proper type of Agent is ActionNoiseAgent.
    agents = []
    for _ in range(num_agents):
        agent = ActionNoiseAgent(num_games, OUNoise(action_dims))
        agent.set_env(GymEnv, game)
        agents.append(agent)

    alg = DDPG(
        model=ContinuousDeterministicModel(input_dims=state_shape[0],
                                           action_dims=action_dims),
        update_ref_interval=1,
        update_weight=0.001,