def __init__(self, args, state_dim, action_dim, action_lim, ram): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.args = args self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.args.learning_rate) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.args.learning_rate) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, hp): """Initialize an Agent object. Params ====== hp: hyper parameters """ self.hp = hp # Actor Network (w/ Target Network) self.actor_local = model.Actor(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.actor_target = model.Actor(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.hp.lr_actor) # Critic Network (w/ Target Network) self.critic_local = model.Critic(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.critic_target = model.Critic(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.hp.lr_critic, weight_decay=self.hp.weight_decay) self.soft_update(self.critic_local, self.critic_target, 1) self.soft_update(self.actor_local, self.actor_target, 1) # Noise process self.noise = ounoise.OUNoise(self.hp.action_size, self.hp.random_seed)
def __init__(self, state_dim, action_dim, action_lim, ram, device='cpu'): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.device = device # self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim).to(device) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim).to(device) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim).to(device) self.target_critic = model.Critic(self.state_dim, self.action_dim).to(device) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, config, state_size, action_size, num_agents, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.config = config self.state_size = state_size self.action_size = action_size self.num_agents = num_agents self.seed = random.seed(seed) # Initialize the Actor and Critic Networks self.actor = model.Actor(state_size, action_size, seed).to(self.config.device) self.actor_target = model.Actor(state_size, action_size, seed).to(self.config.device) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.config.LR_actor) self.critic = model.Critic(state_size, action_size, seed).to(self.config.device) self.critic_target = model.Critic(state_size, action_size, seed).to(self.config.device) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), self.config.LR_critic, weight_decay=self.config.weight_decay) # Initialize the random-noise-process for action-noise self.is_training = True self.randomer = OUNoise((self.num_agents, self.action_size), seed) # Hard update the target networks to have the same parameters as the local networks for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()): target_param.data.copy_(param.data) for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): target_param.data.copy_(param.data) # Initialize replay-buffer self.memory = ReplayBuffer(self.config.BUFFER_SIZE, self.config.BATCH_SIZE, seed, self.config.device)
def __init__(self, state_size, action_size, random_seed, num_envs=1, checkpt_folder="checkpt"): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.num_envs = num_envs self.action_size = action_size self.seed = random.seed(random_seed) self.CHECKPOINT_FOLDER = checkpt_folder # Actor Network (w/ Target Network) self.actor_local = model.Actor(state_size, action_size, random_seed).to(DEVICE) self.actor_target = model.Actor(state_size, action_size, random_seed).to(DEVICE) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = model.Critic(state_size, action_size, random_seed).to(DEVICE) self.critic_target = model.Critic(state_size, action_size, random_seed).to(DEVICE) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) '''if os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth') and os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'): self.actor_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth')) self.actor_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth')) self.critic_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth')) self.critic_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))''' # Noise process self.noise = OUNoise((num_envs, action_size), random_seed) # Replay memory self.memory = ReplayBuffer(DEVICE, action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, model_source, img_size=299): model_source = torch.load(model_source) self.word2idx = model_source["dict"] self.idx2word = {v: k for k, v in self.word2idx.items()} args = model_source["settings"] actor = model.Actor(args.vocab_size, args.dec_hsz, args.rnn_layers, 2, args.max_len, args.dropout, True) actor.load_state_dict(model_source["model"]) actor = actor.cuda() self.actor = actor.eval() self._encode = transforms.Compose([ transforms.Resize(img_size), transforms.CenterCrop(img_size), transforms.ToTensor() ]) self.max_len = args.max_len
def __init__(self, state_dim, action_dim, action_lim, ram): """Special method for object initialisation. :param state_dim: Dimensions of state. :type state_dim: int. :param action_dim: Dimension of action. :type action_dim: int. :param action_lim: Used to limit action in [-action_lim, action_lim]. :type action_lim: float. :param ram: replay memory buffer object. :type ram: buffer. """ # Set the parameters. self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 # Set the noise function. self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) # Set the actor. self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) # Set the critic. self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # Update the actor and critic networks self.hard_update(self.target_actor, self.actor) self.hard_update(self.target_critic, self.critic) return
def rl_graph(sess, phrl): Actor = model.Actor() Y_score = Actor.build(phrl['states_rl'], N_ACTION, phrl['is_training_rl']) Y_prob =tf.nn.softmax(Y_score) neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = Y_score, labels = phrl['actions_rl']) loss_op = tf.reduce_mean(neg_log_prob*phrl['values_rl']) # update_op = tf.train.MomentumOptimizer(LR, MOMENTUM).minimize(loss_op, var_list=Actor.vars) update_op = tf.train.AdamOptimizer(1e-3).minimize(loss_op, var_list=Actor.vars) return loss_op, Y_prob, update_op, Actor.vars
def __init__(self, state_dim, action_dim, ram): """ Initialize actor and critic networks """ self.state_dim = state_dim self.action_dim = action_dim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim) self.target_actor = model.Actor(self.state_dim, self.action_dim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # copy parameters to target networks utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, env, state_vector_size, action_num, action_limit, ram): """ :param env: Gym environment :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.env = env self.state_dim = state_vector_size self.action_dim = action_num self.action_lim = action_limit self.ram = ram self.iter = 0 self.noise = OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic) self.state_vector_size = state_vector_size self.action_num = action_num self.action_limit = action_limit self.controller = DQNAgent(env, state_vector_size, action_num, action_limit)
def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma, tau, batchsize, expl_rate, version): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.LR_actor = LR_actor self.LR_critic = LR_critic self.gamma = gamma self.tau = tau self.ram = ram self.batchsize = batchsize self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise( self.action_dim, 0, 0.15, expl_rate) self.action_lim = 1.0 self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.LR_actor) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.LR_critic) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def build_net(self): '''build actor, critic, target_actor, target_critic network''' # actor self.actor = model.Actor(self.state_size, self.hidden_size, self.action_size, \ self.n_layers, output_activation=self.output_activation) self.target_actor = deepcopy(self.actor) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.lr) # critic self.critic = model.QNet(self.state_size, self.hidden_size, 1, \ self.n_layers, self.action_size, output_activation=None) self.target_critic = deepcopy(self.critic) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=self.lr) # replay memory self.replay_memory = util.ReplayMemory(1000000) self.transition = collections.namedtuple( "transition", ["state", "action", "next_state", "reward"]) if self.ounoise: self.noise = OUNoise(mu=np.zeros(self.action_size), sigma=0.1) self.epsilon = 1.
.format(i_episode, np.mean(scores_deque))) agent.checkpoint(checkpt) break return scores, mean_scores_window # In[19]: config = Config(seed=6) config.num_agents = len(env_info.agents) config.state_size = state_size config.action_size = action_size config.actor_fn = lambda: model.Actor(config.state_size, config.action_size, 128, 128) config.actor_opt_fn = lambda params: optim.Adam(params, lr=1e-3) config.critic_fn = lambda: model.Critic(config.state_size, config.action_size, 1, 128, 128) config.critic_opt_fn = lambda params: optim.Adam(params, lr=2e-3) config.replay_fn = lambda: Replay( config.action_size, buffer_size=int(1e6), batch_size=128) config.noise_fn = lambda: OUNoise( config.action_size, mu=0., theta=0.15, sigma=0.1, seed=config.seed) config.discount = 0.99 config.target_mix = 3e-3 config.max_episodes = 3000
db.create_all() with open('data/movies.csv', 'r', encoding='utf-8-sig') as movies_file: reader = DictReader(movies_file) for row in reader: new_movie = model.Movie(name=row['name'], year=row['year']) actors = row['actors'].split(';') for actor in actors: print(actor) existing_actor = model.Actor.query.filter_by(name=actor).first() if (existing_actor): existing_actor.movies.append(new_movie) new_movie.actors.append(existing_actor) else: new_actor = model.Actor(name=actor) new_actor.movies.append(new_movie) new_movie.actors.append(new_actor) db.session.add(new_actor) db.session.add(new_movie) with open('data/songs.csv', 'r', encoding='utf-8-sig') as songs_file: reader = DictReader(songs_file) for row in reader: new_song = model.Song(name=row['name']) # add artists artists = row['artists'].split(";") for artist_name in artists: print(artist_name)
data['valid']['imgs'], data['valid']['captions'], args.max_len, batch_size=args.batch_size, is_cuda=use_cuda, evaluation=True) # ############################################################################## # Build model # ############################################################################## import model from const import PAD from optim import Optim encode = model.Encode(use_cuda) actor = model.Actor(args.vocab_size, args.dec_hsz, args.rnn_layers, args.batch_size, args.max_len, args.dropout, use_cuda) critic = model.Critic(args.vocab_size, args.dec_hsz, args.rnn_layers, args.batch_size, args.max_len, args.dropout, use_cuda) optim_pre_A = Optim(actor.parameters(), args.pre_lr, True) optim_pre_C = Optim(critic.parameters(), args.pre_lr, True) optim_A = Optim(actor.parameters(), args.lr, False, args.new_lr) optim_C = Optim(critic.parameters(), args.lr, False, args.new_lr) criterion_A = torch.nn.CrossEntropyLoss(ignore_index=PAD) criterion_C = torch.nn.MSELoss() if use_cuda: actor = actor.cuda()
def update_target(self, source, target): new_target_param = parameters_to_vector(source.parameters()) * self.tau + \ (1 - self.tau) * parameters_to_vector(target.parameters()) vector_to_parameters(new_target_param, target.parameters()) return target if __name__ == '__main__': env = gym.make("CartPole-v0") global state_size, action_size state_size = int(np.product(env.observation_space.shape)) action_size = int(env.action_space.n) num_episode = 800 critic = model.Critic(state_size, action_size) actor = model.Actor(state_size, action_size) # actor.eval() # critic.eval() # target network target_critic = deepcopy(critic) target_actor = deepcopy(actor) ddpg = DDPG(env, actor, critic, target_actor, target_critic, num_episode, replay_memory,
def build_net(self): ''' build network based on parameters input Actor Network: * Use gaussian: the network outputs mean and log standard deviation, so the output size should be action size * Use determinstic: the network only outputs mean with action size, so set the discrete as True that log standard deviation is None Value Network: * output the estimated value based on replay buffer Q network: * output the Q value Target Network: * use to update Q network ''' # build net if self.policy_type == "gaussian": self.actor = model.Net(self.state_size, self.hidden_size, self.action_size, \ self.n_layers, output_activation=self.output_activation, discrete=self.discrete) # automated entropy adjustment for maximum entropy rl self.entropy_target = - self.action_size self.log_alpha = torch.zeros(1, requires_grad=True) self.alpha_optimizer = optim.Adam([self.log_alpha], lr=self.lr) # target q network self.target_q_net = model.QNet(self.state_size, self.hidden_size, 1, \ self.n_layers, self.action_size, output_activation=None) elif self.policy_type == "deterministic": self.actor = model.Net(self.state_size, self.hidden_size, self.action_size, \ self.n_layers, output_activation=self.output_activation, discrete=self.discrete) self.value_net = model.Actor(self.state_size, self.hidden_size, 1, \ self.n_layers, output_activation=None) self.target_value_net = deepcopy(self.value_net) self.value_optimizer = optim.Adam(self.value_net.parameters(), lr=self.lr) if self.duel_q_net: # duel Qnet self.q1_net = model.QNet(self.state_size, self.hidden_size, 1, \ self.n_layers, self.action_size, output_activation=None) self.q2_net = deepcopy(self.q1_net) else: self.q_net = model.QNet(self.state_size, self.hidden_size, 1, \ self.n_layers, self.action_size, output_activation=None) # optimizer self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.lr) if self.duel_q_net: self.q1_optimizer = optim.Adam(self.q1_net.parameters(), lr=self.lr) self.q2_optimizer = optim.Adam(self.q2_net.parameters(), lr=self.lr) else: self.q_optimizer = optim.Adam(self.q_net.parameters(), lr=self.lr) # loss function self.value_loss_fn = nn.MSELoss() if self.duel_q_net: self.q1_loss_fn = nn.MSELoss() self.q2_loss_fn = nn.MSELoss() else: self.q_loss_fn = nn.MSELoss() # replay memory self.replay_memory = util.ReplayMemory(1000000) self.transition = collections.namedtuple("transition", ["state", "action", "next_state", "reward"]) # entropy target self.entropy_target = - self.action_size self.noise = OUNoise(mu=np.zeros(self.action_size), sigma=0.1) self.epsilon = 1.
# Main function if __name__ == '__main__': # set unity environment path (file_name) env = UnityEnvironment(file_name=config.env_name) # env = UnityEnvironment(file_name=config.env_name, worker_id=np.random.randint(100000)) # setting brain for unity default_brain = env.brain_names[0] brain = env.brains[default_brain] train_mode = config.train_mode device = config.device actor = model.Actor(config.action_size, "main").to(device) target_actor = model.Actor(config.action_size, "target").to(device) critic = model.Critic(config.action_size, "main").to(device) target_critic = model.Critic(config.action_size, "target").to(device) optimizer_actor = optim.Adam(actor.parameters(), lr=config.actor_lr) optimizer_critic = optim.Adam(critic.parameters(), lr=config.critic_lr) algorithm = "_DDPG" agent = agent.DDPGAgent(actor, critic, target_actor, target_critic, optimizer_actor, optimizer_critic, device, algorithm) # Initialize target networks agent.hard_update_target()