def init_v_agent(state_shape, action_shape, action_max, batch_size):
    mu_model = Seq_Network([state_shape, 16, 16, action_shape], nn.ReLU(), nn.Tanh())
    p_model = Seq_Network([state_shape, 16, 16, action_shape ** 2], nn.ReLU())
    v_model = Seq_Network([state_shape, 16, 16, 1], nn.ReLU())
    noise = OUNoise(1, threshold=action_max, threshold_min=0.001, threshold_decrease=0.001 * action_max)
    agent = NAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=1)
    return agent
예제 #2
0
def init_agent(state_shape, action_shape, u_max, v_max, batch_size):
    u_model = init_u_agent(state_shape, action_shape, u_max)
    v_model = init_v_agent(state_shape, action_shape, v_max)
    v_network = Seq_Network([state_shape, 16, 16, 1], nn.ReLU())
    noise = OUNoise(1, threshold=1, threshold_min=0.002, threshold_decrease=0.002)
    agent = models.centrilized_naf.CentralizedNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape,
                                                       u_max, v_max,
                                                       batch_size)
    return agent
예제 #3
0
def init_agent(state_shape, action_shape, u_max, v_max, batch_size):
    u_model = init_u_agent(state_shape, action_shape, u_max)
    v_model = init_v_agent(state_shape, action_shape, v_max)
    v_network = Seq_Network([state_shape, 50, 50, 1], nn.ReLU())
    noise = OUNoise(2,
                    threshold=1,
                    threshold_min=0.002,
                    threshold_decrease=0.003)
    agent = CentralizedDoubleNafAgent(u_model, v_model, v_network, noise,
                                      state_shape, action_shape, u_max, v_max,
                                      batch_size)
    return agent
예제 #4
0
def init_u_agent(state_shape, action_shape, action_max, batch_size):
    mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(),
                           nn.Tanh())
    p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU())
    v_model = Seq_Network([state_shape, 32, 32, 1], nn.ReLU())
    noise = OUNoise(1,
                    threshold=1,
                    threshold_min=0.001,
                    threshold_decrease=0.003)
    agent = DoubleNAFAgent(mu_model,
                           p_model,
                           v_model,
                           noise,
                           state_shape,
                           action_shape,
                           action_max,
                           batch_size,
                           gamma=0.999)
    return agent
from problems.nonlinear_problem.optimal_agent import OptimalAgent
from utilities.noises import OUNoise
from utilities.sequentialNetwork import Seq_Network

env = NonlinearProblem()
state_shape = 2
action_shape = 1
episodes_n = 250

mu_model = Seq_Network([state_shape, 100, 100, 100, action_shape],
                       nn.Sigmoid())
p_model = Seq_Network([state_shape, 100, 100, 100, action_shape**2],
                      nn.Sigmoid())
v_model = Seq_Network([state_shape, 100, 100, 100, 1], nn.Sigmoid())
noise = OUNoise(action_shape,
                threshold=1,
                threshold_min=0.001,
                threshold_decrease=0.004)
batch_size = 200
agent = SimpleNaf(mu_model, v_model, noise, state_shape, action_shape,
                  batch_size, 1)


def play_and_learn(env):
    total_reward = 0
    state = env.reset()
    done = False
    step = 0
    while not done:
        action = agent.get_action(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward