def __init__(self, state_dim, action_dim, args): self.buffer = Buffer(args.buffer_size) self.optimizer = tf.keras.optimizers.Adam(args.learning_rate) self.state_dim = state_dim self.action_dim = action_dim self.batch_size = args.batch_size self.gamma = args.gamma self.lr = args.learning_rate self.epsilon = args.epsilon self.training_start = args.training_start self.training_step = args.training_step self.current_step = 0 self.copy_iter = args.copy_iter self.network = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_network = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) copy_weight(self.network, self.target_network) self.network_list = { 'Network': self.network, 'Target_Network': self.target_network } self.name = 'Double DQN'
def __init__(self, state_dim, action_dim, args): self.buffer = Buffer(args.buffer_size) self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr) self.critic1_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.critic2_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.state_dim = state_dim self.action_dim = action_dim self.batch_size = args.batch_size self.gamma = args.gamma self.tau = args.tau self.actor_lr = args.actor_lr self.critic_lr = args.critic_lr self.policy_delay = args.policy_delay self.actor_noise = args.actor_noise self.target_noise = args.target_noise self.noise_clip = args.noise_clip self.training_start = args.training_start self.training_step = args.training_step self.current_step = 0 self.actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) self.critic1 = Q_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_critic1 = Q_network(self.state_dim, self.action_dim, args.hidden_dim) self.critic2 = Q_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_critic2 = Q_network(self.state_dim, self.action_dim, args.hidden_dim) copy_weight(self.actor, self.target_actor) copy_weight(self.critic1, self.target_critic1) copy_weight(self.critic2, self.target_critic2) self.network_list = { 'Actor': self.actor, 'Critic1': self.critic1, 'Critic2': self.critic2, 'Target_Critic1': self.target_critic1, 'Target_Critic2': self.target_critic2 } self.name = 'TD3'
def __init__(self, state_dim, action_dim, args): self.discrete = args.discrete self.buffer = On_Policy_Buffer(args.buffer_size) self.gamma = args.gamma self.lambda_gae = args.lambda_gae self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr) self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.state_dim = state_dim self.action_dim = action_dim self.training_start = 0 self.training_step = 1 if self.discrete == True: self.actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) else: self.actor = Gaussian_Actor(self.state_dim, self.action_dim, args.hidden_dim) self.critic = V_network(self.state_dim) self.network_list = {'Actor': self.actor, 'Critic': self.critic} self.name = 'VPG'
def __init__(self, obs_dim, action_dim, args): self.buffer = Buffer(args.buffer_size) self.optimizer = tf.keras.optimizers.Adam(args.learning_rate) self.obs_dim = obs_dim self.action_dim = action_dim self.feature_dim = args.feature_dim self.batch_size = args.batch_size self.gamma = args.gamma self.learning_rate = args.learning_rate self.epsilon = args.epsilon self.training_start = args.training_start self.training_step = args.training_step self.current_step = 0 self.copy_iter = args.copy_iter self.layer_num = args.layer_num self.filter_num = args.filter_num self.network = Policy_network(self.feature_dim, self.action_dim, args.hidden_dim) self.target_network = Policy_network(self.feature_dim, self.action_dim, args.hidden_dim) self.encoder = PixelEncoder(self.obs_dim, self.feature_dim, self.layer_num, self.filter_num, 'channels_last') self.target_encoder = PixelEncoder(self.obs_dim, self.feature_dim, self.layer_num, self.filter_num, 'channels_last') copy_weight(self.network, self.target_network) copy_weight(self.encoder, self.target_encoder) self.network_list = { 'Network': self.network, 'Target_Network': self.target_network } self.name = 'ImageDQN'
def __init__(self, state_dim, action_dim, args): self.buffer = Buffer(args.buffer_size) self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr) self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.state_dim = state_dim self.action_dim = action_dim self.batch_size = args.batch_size self.gamma = args.gamma self.tau = args.tau self.noise_scale = args.noise_scale self.training_start = args.training_start self.training_step = args.training_step self.current_step = 0 self.actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) self.critic = Q_network(self.state_dim, self.action_dim, args.hidden_dim) self.target_critic = Q_network(self.state_dim, self.action_dim, args.hidden_dim) copy_weight(self.actor, self.target_actor) copy_weight(self.critic, self.target_critic) self.network_list = { 'Actor': self.actor, 'Target_Actor': self.target_actor, 'Critic': self.critic, 'Target_Critic': self.target_critic } self.name = 'DDPG'
def __init__(self, state_dim, action_dim, args): self.buffer = On_Policy_Buffer(args.buffer_size) self.state_dim = state_dim self.action_dim = action_dim self.discrete = args.discrete self.gamma = args.gamma self.training_start = 0 self.training_step = 1 self.optimizer = tf.keras.optimizers.Adam(args.learning_rate) if args.discrete == True: self.network = Policy_network(self.state_dim, self.action_dim, args.hidden_dim) else: self.network = Gaussian_Actor(self.state_dim, self.action_dim, args.hidden_dim) self.network_list = {'Network': self.network} self.name = 'REINFORCE'
def __init__(self, state_dim, action_dim, args): self.discrete = args.discrete self.buffer = On_Policy_Buffer(args.buffer_size) self.ppo_mode = args.ppo_mode #mode: 'clip' assert self.ppo_mode is 'clip' self.gamma = args.gamma self.lambda_gae = args.lambda_gae self.batch_size = args.batch_size self.clip = args.clip self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr) self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.state_dim = state_dim self.action_dim = action_dim self.training_start = 0 self.training_step = args.training_step if self.discrete == True: self.actor = Policy_network(self.state_dim, self.action_dim, args.hidden_dim, kernel_initializer='RandomUniform') else: self.actor = Gaussian_Actor(self.state_dim, self.action_dim, args.hidden_dim, kernel_initializer='RandomUniform') self.critic = V_network(self.state_dim) self.network_list = {'Actor': self.actor, 'Critic': self.critic} self.name = 'PPO'
def __init__(self, obs_dim, action_dim, args): self.buffer = Buffer(args.buffer_size) self.obs_dim = obs_dim self.action_dim = action_dim self.image_size = obs_dim[-1] self.current_step = 0 self.gamma = args.gamma self.batch_size = args.batch_size self.feature_dim = args.feature_dim self.curl_latent_dim = args.curl_latent_dim self.layer_num = args.layer_num self.filter_num = args.filter_num self.tau = args.tau self.encoder_tau = args.encoder_tau self.policy_delay = args.policy_delay self.actor_noise = args.actor_noise self.target_noise = args.target_noise self.noise_clip = args.noise_clip self.training_start = args.training_start self.training_step = args.training_step self.actor = Policy_network(self.feature_dim, self.action_dim, args.hidden_dim) self.target_actor = Policy_network(self.feature_dim, self.action_dim, args.hidden_dim) self.critic1 = Q_network(self.feature_dim, self.action_dim, args.hidden_dim) self.critic2 = Q_network(self.feature_dim, self.action_dim, args.hidden_dim) self.target_critic1 = Q_network(self.feature_dim, self.action_dim, args.hidden_dim) self.target_critic2 = Q_network(self.feature_dim, self.action_dim, args.hidden_dim) self.encoder = PixelEncoder(self.obs_dim, self.feature_dim, self.layer_num, self.filter_num) self.target_encoder = PixelEncoder(self.obs_dim, self.feature_dim, self.layer_num, self.filter_num) copy_weight(self.actor, self.target_actor) copy_weight(self.critic1, self.target_critic1) copy_weight(self.critic2, self.target_critic2) copy_weight(self.encoder, self.target_encoder) self.curl = CURL(self.feature_dim, self.curl_latent_dim) self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr) self.critic1_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.critic2_optimizer = tf.keras.optimizers.Adam(args.critic_lr) self.encoder_optimizer = tf.keras.optimizers.Adam(args.encoder_lr) self.cpc_optimizer = tf.keras.optimizers.Adam(args.cpc_lr) self.network_list = { 'Actor': self.actor, 'Critic1': self.critic1, 'Critic2': self.critic2, 'Target_Critic1': self.target_critic1, 'Target_Critic2': self.target_critic2, 'Curl': self.curl, 'Encoder': self.encoder, 'Target_Encoder': self.target_encoder } self.name = 'CURL_TD3'
def __init__(self, obs_dim, action_dim, hidden_dim=512, gamma=0.99, learning_rate=0.001, batch_size=512, policy_delay=2, actor_noise=0.1, target_noise=0.2, noise_clip=0.5, buffer_size=1e6, feature_dim=50, layer_num=4, filter_num=32, tau=0.005, encoder_tau=0.005, bisim_coef=0.5, training_start=1000): self.buffer = Buffer(buffer_size) self.obs_dim = obs_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.gamma = gamma self.learning_rate = learning_rate self.batch_size = batch_size self.feature_dim = feature_dim self.layer_num = layer_num self.filter_num = filter_num self.tau = tau self.encoder_tau = encoder_tau self.bisim_coef = bisim_coef self.policy_delay = policy_delay self.actor_noise = actor_noise self.target_noise = target_noise self.noise_clip = noise_clip self.training_start = training_start self.actor = Policy_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.target_actor = Policy_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.critic1 = Q_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.critic2 = Q_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.target_critic1 = Q_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.target_critic2 = Q_network(feature_dim, action_dim, (hidden_dim, hidden_dim)) self.encoder = PixelEncoder(self.obs_dim, feature_dim, layer_num, filter_num) self.target_encoder = PixelEncoder(self.obs_dim, feature_dim, layer_num, filter_num) self.dynamics_model = Transition_Network(feature_dim, action_dim) self.reward_model = Reward_Network(feature_dim) copy_weight(self.actor, self.target_actor) copy_weight(self.critic1, self.target_critic1) copy_weight(self.critic2, self.target_critic2) copy_weight(self.encoder, self.target_encoder) self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate) self.critic1_optimizer = tf.keras.optimizers.Adam(learning_rate) self.critic2_optimizer = tf.keras.optimizers.Adam(learning_rate) self.encoder_optimizer = tf.keras.optimizers.Adam(learning_rate) self.dynamics_optimizer = tf.keras.optimizers.Adam(learning_rate) self.reward_optimizer = tf.keras.optimizers.Adam(learning_rate) self.name = 'DBC_TD3'