def __init__(self, state_size, action_size, random_seed, memory, hyper_param=None): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ if hyper_param is None: hyper_param = HyperParam() hyper_param.actor_fc1 = 128 hyper_param.actor_fc2 = 128 hyper_param.critic_fc1 = 128 hyper_param.critic_fc2 = 128 hyper_param.lr_actor = LR_ACTOR hyper_param.lr_critic = LR_CRITIC hyper_param.tau = TAU self.hyper_param = hyper_param self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_target = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=hyper_param.lr_actor) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Initialize and local to target to be the same # self.soft_update(self.critic_local, self.critic_target, tau=1.0) # self.soft_update(self.actor_local, self.actor_target, tau=1.0) # Noise process self.noise = OUNoise(action_size, random_seed) self.memory = memory # ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, state_size, action_size, random_seed): # Initialize an Agent object. # Params # ====== # state_size (int): dimension of each state # action_size (int): dimension of each action # random_seed (int): random seed # self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC) self.noise = OUNoise(action_size, random_seed) self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, state_size, action_size, num_agents, random_seed, hyper_param=None): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.num_agents = num_agents if hyper_param is None: hyper_param = HyperParam() hyper_param.epsilon = True hyper_param.epsilon_decay = EXPLORE_EXPLOIT_DECAY hyper_param.epsilon_spaced_init = 100 hyper_param.epsilon_spaced_decay = 1.5 hyper_param.actor_fc1 = 128 hyper_param.actor_fc2 = 128 hyper_param.critic_fc1 = 128 hyper_param.critic_fc2 = 128 hyper_param.lr_actor = 1e-3 hyper_param.lr_critic = 1e-3 hyper_param.tau = 1e-4 hyper_param.batch_size = 128 hyper_param.n_learn_updates = 10 hyper_param.n_time_steps = 20 self.hyper_param = hyper_param self.device = device self.memory = ReplayBuffer(action_size, BUFFER_SIZE, hyper_param.batch_size, random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_target = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=hyper_param.lr_actor) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size * num_agents, action_size * num_agents, random_seed).to(device) self.critic_target = Critic(state_size * num_agents, action_size * num_agents, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=hyper_param.lr_critic, weight_decay=WEIGHT_DECAY) # self.epsilon = SpacedRepetitionDecay(ExponentialDecay(1.0, 0.0, hyper_param.epsilon_decay), # hyper_param.epsilon_spaced_init, hyper_param.epsilon_spaced_decay) self.epsilon = PositiveMemoriesFactorExplorationDecay( 0.5, 0, 0.0002, 0.12, self.memory) self.noise = OUNoise(action_size, random_seed) self.train_mode = True self.actor_loss = [] self.critic_loss = []
def __init__(self, state_size, action_size, num_agents, random_seed, hyper_param=None): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.num_agents = num_agents if hyper_param is None: hyper_param = HyperParam() hyper_param.epsilon = False hyper_param.actor_fc1 = 128 hyper_param.actor_fc2 = 128 hyper_param.critic_fc1 = 128 hyper_param.critic_fc2 = 128 hyper_param.lr_actor = 1e-3 hyper_param.lr_critic = 1e-3 hyper_param.eps_actor = 1e-7 hyper_param.eps_critic = 1e-7 hyper_param.tau = 1e-4 hyper_param.buffer_size = int(1e6) hyper_param.batch_size = 128 hyper_param.n_learn_updates = 10 hyper_param.n_time_steps = 20 hyper_param.gamma = 0.99 self.hyper_param = hyper_param self.device = device self.memory = ReplayBuffer(action_size, self.hyper_param.buffer_size, self.hyper_param.batch_size, random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_target = Actor(state_size, action_size, random_seed, hyper_param.actor_fc1, hyper_param.actor_fc2).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=hyper_param.lr_actor, eps=hyper_param.eps_actor) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=hyper_param.lr_critic, eps=hyper_param.eps_critic) self.hard_update(self.actor_target, self.actor_local) self.hard_update(self.critic_target, self.critic_local) self.noise = OUNoise(action_size, random_seed, mu=0.0) self.train_mode = True self.actor_loss = [] self.critic_loss = [] self.orig_actions = [[0.0, 0.0], [0.0, 0.0]] if hyper_param.epsilon: self.epsilon = hyper_param.epsilon_model(self.memory)