def __init__(self, args, state_dim, action_dim, action_lim, ram): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.args = args self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.args.learning_rate) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.args.learning_rate) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, state_num, action_num, buffer_size=2000, batch_size=64, gamma=0.99, \ device=torch.device('cpu'), hidden_unit=16, lr=1e-3): self.device = device # create A-C Networks self.actor = Actor(state_num, action_num, hidden_unit=hidden_unit).to(device) self.target_actor = Actor(state_num, action_num, hidden_unit=hidden_unit).to(device) self.critic = Critic(state_num, action_num, hidden_unit=hidden_unit).to(device) self.target_critic = Critic(state_num, action_num, hidden_unit=hidden_unit).to(device) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic) # add optimize self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=lr) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=lr) # add other params self.buffer_size = buffer_size self.batch_size = batch_size self.gamma = gamma self.memory_buffer = deque(maxlen=buffer_size) self.noise = utils.OrnsteinUhlenbeckActionNoise(action_num) # self.action_range = action_range self.critic_loss_F = nn.MSELoss()
def __init__(self, settings): self.settings = init_settings.copy() self.settings.update(settings) self.state_dim = self.settings["state_dim"] self.action_dim = self.settings["action_dim"] self.action_lim = self.settings["action_lim"] self.noise = utils.OrnsteinUhlenbeckActionNoise( self.settings["action_dim"]) self.buffer = MemoryBuffer(self.settings["buffer_length"]) self.batch_size = self.settings["batch_size"] self.gamma = self.settings["reward_decay"] self.tau = self.settings["TAU"] self.lr = self.settings["learning_rate"] self.actor = self.settings["Actor"] self.critic = self.settings["Critic"] self.target_actor = copy.deepcopy(self.actor) self.target_critic = copy.deepcopy(self.critic) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.lr) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.lr) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, game): super(Agent, self).__init__() self.env_name = game.env_name self.id = -1 # main network self.net = nn.ModuleList() input_size = game.input_size for size in game.layers: self.net.append(nn.Linear(input_size, size)) input_size = size self.output = nn.Linear(input_size, game.output_size) # action noise self.noise = utils.OrnsteinUhlenbeckActionNoise(game.output_size) self.env = None
def __init__(self, agent, ram): self.ram = ram self.iter = 0 self.agent = agent self.noise = utils.OrnsteinUhlenbeckActionNoise( self.agent["action_dim"]) self.actor = self.agent["actor"] self.target_actor = self.agent["target_actor"] self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = self.agent['critic'] self.target_critic = self.agent['target_critic'] self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE)
def __init__(self, state_dim, action_dim, action_lim, ram): """Special method for object initialisation. :param state_dim: Dimensions of state. :type state_dim: int. :param action_dim: Dimension of action. :type action_dim: int. :param action_lim: Used to limit action in [-action_lim, action_lim]. :type action_lim: float. :param ram: replay memory buffer object. :type ram: buffer. """ # Set the parameters. self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 # Set the noise function. self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) # Set the actor. self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) # Set the critic. self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # Update the actor and critic networks self.hard_update(self.target_actor, self.actor) self.hard_update(self.target_critic, self.critic) return
def __init__(self, obs_space, action_space, ram, writer, device, args): """ :param obs_space: Dimensions of state (int) :param action_space: Dimension of action (int) :param ram: replay memory buffer object :return: """ self.state_dim = obs_space.shape[0] self.action_dim = action_space.shape[0] self.action_high = action_space.high self.action_low = action_space.low self.ram = ram self.iter = 1 self.steps = 0 self.gamma = args.gamma self.batch_size = args.batch_size self.tau = args.tau self.decay_rate = args.decay_rate self.eps_start = args.eps_start self.eps_end = args.eps_end self.eps_decay = args.eps_decay self.start_step = args.start_learning self.device = device self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.writer = writer self.args = args # init network target_net = DDPG(obs_space.shape, self.action_dim, args).to(device) learn_net = DDPG(obs_space.shape, self.action_dim, args).to(device) utils.hard_update(target_net, learn_net) self.AC = learn_net self.AC_T = target_net self.actor_optimizer = torch.optim.Adam( self.AC.actor.policyNet.parameters(), args.lr_a) self.critic_optimizer = torch.optim.Adam(self.AC.critic.parameters(), args.lr_c) self.actor = self.AC.actor self.target_actor = self.AC_T.actor self.critic = self.AC.critic self.target_critic = self.AC_T.critic
def __init__(self, state_dim, action_dim, ram): """ Initialize actor and critic networks """ self.state_dim = state_dim self.action_dim = action_dim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim) self.target_actor = model.Actor(self.state_dim, self.action_dim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # copy parameters to target networks utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, state_dim, action_dim, action_lim, ram): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE, weight_decay=1e-5) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE * 10, weight_decay=1e-5) if (USEGPU): self.target_actor = self.target_actor.cuda() self.actor = self.actor.cuda() self.target_critic = self.target_critic.cuda() self.critic = self.critic.cuda() utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma, tau, batchsize, expl_rate, version): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.LR_actor = LR_actor self.LR_critic = LR_critic self.gamma = gamma self.tau = tau self.ram = ram self.batchsize = batchsize self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise( self.action_dim, 0, 0.15, expl_rate) self.action_lim = 1.0 self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.LR_actor) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.LR_critic) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)