def init_v_agent(state_shape, action_shape, action_max, batch_size): mu_model = Seq_Network([state_shape, 16, 16, action_shape], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape, 16, 16, action_shape ** 2], nn.ReLU()) v_model = Seq_Network([state_shape, 16, 16, 1], nn.ReLU()) noise = OUNoise(1, threshold=action_max, threshold_min=0.001, threshold_decrease=0.001 * action_max) agent = NAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=1) return agent
def init_agent(state_shape, action_shape, u_max, v_max, batch_size): u_model = init_u_agent(state_shape, action_shape, u_max) v_model = init_v_agent(state_shape, action_shape, v_max) v_network = Seq_Network([state_shape, 16, 16, 1], nn.ReLU()) noise = OUNoise(1, threshold=1, threshold_min=0.002, threshold_decrease=0.002) agent = models.centrilized_naf.CentralizedNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape, u_max, v_max, batch_size) return agent
def init_agent(state_shape, action_shape, u_max, v_max, batch_size): u_model = init_u_agent(state_shape, action_shape, u_max) v_model = init_v_agent(state_shape, action_shape, v_max) v_network = Seq_Network([state_shape, 50, 50, 1], nn.ReLU()) noise = OUNoise(2, threshold=1, threshold_min=0.002, threshold_decrease=0.003) agent = CentralizedDoubleNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape, u_max, v_max, batch_size) return agent
def init_u_agent(state_shape, action_shape, action_max, batch_size): mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU()) v_model = Seq_Network([state_shape, 32, 32, 1], nn.ReLU()) noise = OUNoise(1, threshold=1, threshold_min=0.001, threshold_decrease=0.003) agent = DoubleNAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=0.999) return agent
from problems.nonlinear_problem.optimal_agent import OptimalAgent from utilities.noises import OUNoise from utilities.sequentialNetwork import Seq_Network env = NonlinearProblem() state_shape = 2 action_shape = 1 episodes_n = 250 mu_model = Seq_Network([state_shape, 100, 100, 100, action_shape], nn.Sigmoid()) p_model = Seq_Network([state_shape, 100, 100, 100, action_shape**2], nn.Sigmoid()) v_model = Seq_Network([state_shape, 100, 100, 100, 1], nn.Sigmoid()) noise = OUNoise(action_shape, threshold=1, threshold_min=0.001, threshold_decrease=0.004) batch_size = 200 agent = SimpleNaf(mu_model, v_model, noise, state_shape, action_shape, batch_size, 1) def play_and_learn(env): total_reward = 0 state = env.reset() done = False step = 0 while not done: action = agent.get_action(state) next_state, reward, done, _ = env.step(action) total_reward += reward