Пример #1
0
    def __init__(self, state_dim, action_dim, args):

        self.buffer = Buffer(args.buffer_size)

        self.optimizer = tf.keras.optimizers.Adam(args.learning_rate)

        self.state_dim = state_dim
        self.action_dim = action_dim

        self.batch_size = args.batch_size
        self.gamma = args.gamma
        self.lr = args.learning_rate
        self.epsilon = args.epsilon
        self.training_start = args.training_start
        self.training_step = args.training_step
        self.current_step = 0
        self.copy_iter = args.copy_iter

        self.network = Policy_network(self.state_dim, self.action_dim,
                                      args.hidden_dim)
        self.target_network = Policy_network(self.state_dim, self.action_dim,
                                             args.hidden_dim)

        copy_weight(self.network, self.target_network)

        self.network_list = {
            'Network': self.network,
            'Target_Network': self.target_network
        }
        self.name = 'Double DQN'
Пример #2
0
    def __init__(self, state_dim, action_dim, args):

        self.buffer = Buffer(args.buffer_size)

        self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr)
        self.critic1_optimizer = tf.keras.optimizers.Adam(args.critic_lr)
        self.critic2_optimizer = tf.keras.optimizers.Adam(args.critic_lr)

        self.state_dim = state_dim
        self.action_dim = action_dim

        self.batch_size = args.batch_size
        self.gamma = args.gamma
        self.tau = args.tau
        self.actor_lr = args.actor_lr
        self.critic_lr = args.critic_lr
        self.policy_delay = args.policy_delay
        self.actor_noise = args.actor_noise
        self.target_noise = args.target_noise
        self.noise_clip = args.noise_clip
        self.training_start = args.training_start
        self.training_step = args.training_step
        self.current_step = 0

        self.actor = Policy_network(self.state_dim, self.action_dim,
                                    args.hidden_dim)
        self.target_actor = Policy_network(self.state_dim, self.action_dim,
                                           args.hidden_dim)
        self.critic1 = Q_network(self.state_dim, self.action_dim,
                                 args.hidden_dim)
        self.target_critic1 = Q_network(self.state_dim, self.action_dim,
                                        args.hidden_dim)
        self.critic2 = Q_network(self.state_dim, self.action_dim,
                                 args.hidden_dim)
        self.target_critic2 = Q_network(self.state_dim, self.action_dim,
                                        args.hidden_dim)

        copy_weight(self.actor, self.target_actor)
        copy_weight(self.critic1, self.target_critic1)
        copy_weight(self.critic2, self.target_critic2)

        self.network_list = {
            'Actor': self.actor,
            'Critic1': self.critic1,
            'Critic2': self.critic2,
            'Target_Critic1': self.target_critic1,
            'Target_Critic2': self.target_critic2
        }
        self.name = 'TD3'
Пример #3
0
    def __init__(self, state_dim, action_dim, args):

        self.discrete = args.discrete

        self.buffer = On_Policy_Buffer(args.buffer_size)

        self.gamma = args.gamma
        self.lambda_gae = args.lambda_gae

        self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr)
        self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr)

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.training_start = 0
        self.training_step = 1

        if self.discrete == True:
            self.actor = Policy_network(self.state_dim, self.action_dim,
                                        args.hidden_dim)
        else:
            self.actor = Gaussian_Actor(self.state_dim, self.action_dim,
                                        args.hidden_dim)

        self.critic = V_network(self.state_dim)

        self.network_list = {'Actor': self.actor, 'Critic': self.critic}
        self.name = 'VPG'
Пример #4
0
    def __init__(self, obs_dim, action_dim, args):

        self.buffer = Buffer(args.buffer_size)
        self.optimizer = tf.keras.optimizers.Adam(args.learning_rate)

        self.obs_dim = obs_dim
        self.action_dim = action_dim

        self.feature_dim = args.feature_dim

        self.batch_size = args.batch_size
        self.gamma = args.gamma
        self.learning_rate = args.learning_rate
        self.epsilon = args.epsilon
        self.training_start = args.training_start
        self.training_step = args.training_step
        self.current_step = 0
        self.copy_iter = args.copy_iter

        self.layer_num = args.layer_num
        self.filter_num = args.filter_num

        self.network = Policy_network(self.feature_dim, self.action_dim,
                                      args.hidden_dim)
        self.target_network = Policy_network(self.feature_dim, self.action_dim,
                                             args.hidden_dim)

        self.encoder = PixelEncoder(self.obs_dim, self.feature_dim,
                                    self.layer_num, self.filter_num,
                                    'channels_last')
        self.target_encoder = PixelEncoder(self.obs_dim, self.feature_dim,
                                           self.layer_num, self.filter_num,
                                           'channels_last')

        copy_weight(self.network, self.target_network)
        copy_weight(self.encoder, self.target_encoder)

        self.network_list = {
            'Network': self.network,
            'Target_Network': self.target_network
        }
        self.name = 'ImageDQN'
Пример #5
0
    def __init__(self, state_dim, action_dim, args):

        self.buffer = Buffer(args.buffer_size)

        self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr)
        self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr)

        self.state_dim = state_dim
        self.action_dim = action_dim

        self.batch_size = args.batch_size
        self.gamma = args.gamma
        self.tau = args.tau
        self.noise_scale = args.noise_scale
        self.training_start = args.training_start
        self.training_step = args.training_step
        self.current_step = 0

        self.actor = Policy_network(self.state_dim, self.action_dim,
                                    args.hidden_dim)
        self.target_actor = Policy_network(self.state_dim, self.action_dim,
                                           args.hidden_dim)
        self.critic = Q_network(self.state_dim, self.action_dim,
                                args.hidden_dim)
        self.target_critic = Q_network(self.state_dim, self.action_dim,
                                       args.hidden_dim)

        copy_weight(self.actor, self.target_actor)
        copy_weight(self.critic, self.target_critic)

        self.network_list = {
            'Actor': self.actor,
            'Target_Actor': self.target_actor,
            'Critic': self.critic,
            'Target_Critic': self.target_critic
        }
        self.name = 'DDPG'
Пример #6
0
    def __init__(self, state_dim, action_dim, args):

        self.buffer = On_Policy_Buffer(args.buffer_size)

        self.state_dim = state_dim
        self.action_dim = action_dim

        self.discrete = args.discrete

        self.gamma = args.gamma
        self.training_start = 0
        self.training_step = 1
        self.optimizer = tf.keras.optimizers.Adam(args.learning_rate)

        if args.discrete == True:
            self.network = Policy_network(self.state_dim, self.action_dim,
                                          args.hidden_dim)
        else:
            self.network = Gaussian_Actor(self.state_dim, self.action_dim,
                                          args.hidden_dim)

        self.network_list = {'Network': self.network}
        self.name = 'REINFORCE'
Пример #7
0
    def __init__(self, state_dim, action_dim, args):

        self.discrete = args.discrete

        self.buffer = On_Policy_Buffer(args.buffer_size)

        self.ppo_mode = args.ppo_mode  #mode: 'clip'
        assert self.ppo_mode is 'clip'

        self.gamma = args.gamma
        self.lambda_gae = args.lambda_gae
        self.batch_size = args.batch_size
        self.clip = args.clip

        self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr)
        self.critic_optimizer = tf.keras.optimizers.Adam(args.critic_lr)

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.training_start = 0
        self.training_step = args.training_step

        if self.discrete == True:
            self.actor = Policy_network(self.state_dim,
                                        self.action_dim,
                                        args.hidden_dim,
                                        kernel_initializer='RandomUniform')
        else:
            self.actor = Gaussian_Actor(self.state_dim,
                                        self.action_dim,
                                        args.hidden_dim,
                                        kernel_initializer='RandomUniform')

        self.critic = V_network(self.state_dim)

        self.network_list = {'Actor': self.actor, 'Critic': self.critic}
        self.name = 'PPO'
Пример #8
0
    def __init__(self, obs_dim, action_dim, args):

        self.buffer = Buffer(args.buffer_size)

        self.obs_dim = obs_dim
        self.action_dim = action_dim
        self.image_size = obs_dim[-1]

        self.current_step = 0

        self.gamma = args.gamma

        self.batch_size = args.batch_size
        self.feature_dim = args.feature_dim
        self.curl_latent_dim = args.curl_latent_dim

        self.layer_num = args.layer_num
        self.filter_num = args.filter_num
        self.tau = args.tau
        self.encoder_tau = args.encoder_tau

        self.policy_delay = args.policy_delay
        self.actor_noise = args.actor_noise
        self.target_noise = args.target_noise
        self.noise_clip = args.noise_clip

        self.training_start = args.training_start
        self.training_step = args.training_step

        self.actor = Policy_network(self.feature_dim, self.action_dim,
                                    args.hidden_dim)
        self.target_actor = Policy_network(self.feature_dim, self.action_dim,
                                           args.hidden_dim)
        self.critic1 = Q_network(self.feature_dim, self.action_dim,
                                 args.hidden_dim)
        self.critic2 = Q_network(self.feature_dim, self.action_dim,
                                 args.hidden_dim)
        self.target_critic1 = Q_network(self.feature_dim, self.action_dim,
                                        args.hidden_dim)
        self.target_critic2 = Q_network(self.feature_dim, self.action_dim,
                                        args.hidden_dim)

        self.encoder = PixelEncoder(self.obs_dim, self.feature_dim,
                                    self.layer_num, self.filter_num)
        self.target_encoder = PixelEncoder(self.obs_dim, self.feature_dim,
                                           self.layer_num, self.filter_num)

        copy_weight(self.actor, self.target_actor)
        copy_weight(self.critic1, self.target_critic1)
        copy_weight(self.critic2, self.target_critic2)
        copy_weight(self.encoder, self.target_encoder)

        self.curl = CURL(self.feature_dim, self.curl_latent_dim)

        self.actor_optimizer = tf.keras.optimizers.Adam(args.actor_lr)
        self.critic1_optimizer = tf.keras.optimizers.Adam(args.critic_lr)
        self.critic2_optimizer = tf.keras.optimizers.Adam(args.critic_lr)

        self.encoder_optimizer = tf.keras.optimizers.Adam(args.encoder_lr)
        self.cpc_optimizer = tf.keras.optimizers.Adam(args.cpc_lr)

        self.network_list = {
            'Actor': self.actor,
            'Critic1': self.critic1,
            'Critic2': self.critic2,
            'Target_Critic1': self.target_critic1,
            'Target_Critic2': self.target_critic2,
            'Curl': self.curl,
            'Encoder': self.encoder,
            'Target_Encoder': self.target_encoder
        }

        self.name = 'CURL_TD3'
Пример #9
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 hidden_dim=512,
                 gamma=0.99,
                 learning_rate=0.001,
                 batch_size=512,
                 policy_delay=2,
                 actor_noise=0.1,
                 target_noise=0.2,
                 noise_clip=0.5,
                 buffer_size=1e6,
                 feature_dim=50,
                 layer_num=4,
                 filter_num=32,
                 tau=0.005,
                 encoder_tau=0.005,
                 bisim_coef=0.5,
                 training_start=1000):

        self.buffer = Buffer(buffer_size)

        self.obs_dim = obs_dim
        self.action_dim = action_dim

        self.hidden_dim = hidden_dim
        self.gamma = gamma
        self.learning_rate = learning_rate

        self.batch_size = batch_size
        self.feature_dim = feature_dim

        self.layer_num = layer_num
        self.filter_num = filter_num
        self.tau = tau
        self.encoder_tau = encoder_tau
        self.bisim_coef = bisim_coef

        self.policy_delay = policy_delay
        self.actor_noise = actor_noise
        self.target_noise = target_noise
        self.noise_clip = noise_clip

        self.training_start = training_start

        self.actor = Policy_network(feature_dim, action_dim,
                                    (hidden_dim, hidden_dim))
        self.target_actor = Policy_network(feature_dim, action_dim,
                                           (hidden_dim, hidden_dim))
        self.critic1 = Q_network(feature_dim, action_dim,
                                 (hidden_dim, hidden_dim))
        self.critic2 = Q_network(feature_dim, action_dim,
                                 (hidden_dim, hidden_dim))
        self.target_critic1 = Q_network(feature_dim, action_dim,
                                        (hidden_dim, hidden_dim))
        self.target_critic2 = Q_network(feature_dim, action_dim,
                                        (hidden_dim, hidden_dim))

        self.encoder = PixelEncoder(self.obs_dim, feature_dim, layer_num,
                                    filter_num)
        self.target_encoder = PixelEncoder(self.obs_dim, feature_dim,
                                           layer_num, filter_num)

        self.dynamics_model = Transition_Network(feature_dim, action_dim)
        self.reward_model = Reward_Network(feature_dim)

        copy_weight(self.actor, self.target_actor)
        copy_weight(self.critic1, self.target_critic1)
        copy_weight(self.critic2, self.target_critic2)
        copy_weight(self.encoder, self.target_encoder)

        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.critic1_optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.critic2_optimizer = tf.keras.optimizers.Adam(learning_rate)

        self.encoder_optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.dynamics_optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.reward_optimizer = tf.keras.optimizers.Adam(learning_rate)

        self.name = 'DBC_TD3'