def __init__(self, s_dim, visual_sources, visual_resolution, a_dim_or_list, is_continuous, lr=5.0e-4, epoch=5, hidden_units={ 'actor_continuous': [32, 32], 'actor_discrete': [32, 32] }, **kwargs): super().__init__( s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, is_continuous=is_continuous, **kwargs) self.epoch = epoch self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1]) self.visual_net = Nn.VisualNet('visual_net', self.visual_dim) if self.is_continuous: self.net = Nn.actor_mu(self.s_dim, self.visual_dim, self.a_counts, 'pg_net', hidden_units['actor_continuous'], visual_net=self.actor_visual_net) self.log_std = tf.Variable(initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True) self.net.tv+=[self.log_std] else: self.net = Nn.actor_discrete(self.s_dim, self.visual_dim, self.a_counts, 'pg_net', hidden_units['actor_discrete'], visual_net=self.actor_visual_net) self.lr = tf.keras.optimizers.schedules.PolynomialDecay(lr, self.max_episode, 1e-10, power=1.0) self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.lr(self.episode))
def __init__( self, s_dim, visual_sources, visual_resolution, a_dim_or_list, is_continuous, epoch=5, beta=1.0e-3, actor_lr=5.0e-4, critic_lr=1.0e-3, hidden_units={ 'actor_continuous': [32, 32], 'actor_discrete': [32, 32], 'critic': [32, 32] }, **kwargs): super().__init__(s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, is_continuous=is_continuous, **kwargs) self.beta = beta self.epoch = epoch self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1]) if self.is_continuous: self.actor_net = Nn.actor_mu(self.s_dim, self.visual_dim, self.a_counts, 'actor_net', hidden_units['actor_continuous']) self.log_std = tf.Variable( initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True) else: self.actor_net = Nn.actor_discrete(self.s_dim, self.visual_dim, self.a_counts, 'actor_net', hidden_units['actor_discrete']) self.critic_net = Nn.critic_v(self.s_dim, self.visual_dim, 'critic_net', hidden_units['critic']) self.actor_lr = tf.keras.optimizers.schedules.PolynomialDecay( actor_lr, self.max_episode, 1e-10, power=1.0) self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay( critic_lr, self.max_episode, 1e-10, power=1.0) self.optimizer_critic = tf.keras.optimizers.Adam( learning_rate=self.critic_lr(self.episode)) self.optimizer_actor = tf.keras.optimizers.Adam( learning_rate=self.actor_lr(self.episode)) self.recorder.logger.info(''' xx xxxxx xxxxxx xxx xx xxx xxx xx xxx xx xxx xx xx x xx xx xx xx xx xxx xxx xxxxxx xx xxx xx xx xx xx xx xx xx xx x xxx xxx xxx xxxxx xxxxxx xxxxxx ''')
def __init__(self, s_dim, visual_sources, visual_resolution, a_dim_or_list, is_continuous, beta=1.0e-3, lr=5.0e-4, delta=0.01, lambda_=0.95, cg_iters=10, train_v_iters=10, damping_coeff=0.1, backtrack_iters=10, backtrack_coeff=0.8, share_visual_net=True, epsilon=0.2, critic_lr=1e-3, hidden_units={ 'actor_continuous': [32, 32], 'actor_discrete': [32, 32], 'critic': [32, 32] }, **kwargs): super().__init__( s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, is_continuous=is_continuous, **kwargs) self.beta = beta self.delta = delta self.lambda_ = lambda_ self.epsilon = epsilon self.cg_iters = cg_iters self.damping_coeff = damping_coeff self.backtrack_iters = backtrack_iters self.backtrack_coeff = backtrack_coeff self.train_v_iters = train_v_iters self.actor_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1], [1]) self.critic_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [1]) self.share_visual_net = share_visual_net if self.share_visual_net: self.actor_visual_net = self.critic_visual_net = Nn.VisualNet('visual_net', self.visual_dim) else: self.actor_visual_net = Nn.VisualNet('actor_visual_net', self.visual_dim) self.critic_visual_net = Nn.VisualNet('critic_visual_net', self.visual_dim) if self.is_continuous: self.actor_net = Nn.actor_mu(self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_continuous'], visual_net=self.actor_visual_net) self.log_std = tf.Variable(initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True) self.actor_net.tv += [self.log_std] self.actor_params = self.actor_net.tv self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_params).shape, dtype=tf.float32)] \ + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [self.a_counts]) else: self.actor_net = Nn.actor_discrete(self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_discrete'], visual_net=self.actor_visual_net) self.actor_params = self.actor_net.tv self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_params).shape, dtype=tf.float32)] \ + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts]) self.critic_net = Nn.critic_v(self.s_dim, 'critic_net', hidden_units['critic'], visual_net=self.critic_visual_net) self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay(critic_lr, self.max_episode, 1e-10, power=1.0) self.optimizer_critic = tf.keras.optimizers.Adam(learning_rate=self.critic_lr(self.episode))
def __init__( self, s_dim, visual_sources, visual_resolution, a_dim_or_list, is_continuous, epoch=5, beta=1.0e-3, lr=5.0e-4, lambda_=0.95, epsilon=0.2, share_net=True, actor_lr=3e-4, critic_lr=1e-3, hidden_units={ 'share': { 'continuous': { 'share': [32, 32], 'mu': [32, 32], 'v': [32, 32] }, 'discrete': { 'share': [32, 32], 'logits': [32, 32], 'v': [32, 32] } }, 'actor_continuous': [32, 32], 'actor_discrete': [32, 32], 'critic': [32, 32] }, **kwargs): super().__init__(s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, is_continuous=is_continuous, **kwargs) self.beta = beta self.epoch = epoch self.lambda_ = lambda_ self.epsilon = epsilon self.share_net = share_net if self.is_continuous: self.log_std = tf.Variable( initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True) if self.share_net: self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1], [1], [1]) self.visual_net = Nn.VisualNet('visual_net', self.visual_dim) if self.is_continuous: self.net = Nn.a_c_v_continuous( self.s_dim, self.a_counts, 'ppo_net', hidden_units['share']['continuous'], visual_net=self.visual_net) self.net.tv += [self.log_std] else: self.net = Nn.a_c_v_discrete(self.s_dim, self.a_counts, 'ppo_net', hidden_units['share']['discrete'], visual_net=self.visual_net) self.lr = tf.keras.optimizers.schedules.PolynomialDecay( lr, self.max_episode, 1e-10, power=1.0) self.optimizer = tf.keras.optimizers.Adam( learning_rate=self.lr(self.episode)) else: self.actor_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1], [1]) self.critic_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [1]) self.actor_visual_net = Nn.VisualNet('actor_visual_net', self.visual_dim) self.critic_visual_net = Nn.VisualNet('critic_visual_net', self.visual_dim) if self.is_continuous: self.actor_net = Nn.actor_mu(self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_continuous'], visual_net=self.actor_visual_net) self.actor_net.tv += [self.log_std] else: self.actor_net = Nn.actor_discrete( self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_discrete'], visual_net=self.actor_visual_net) self.critic_net = Nn.critic_v(self.s_dim, 'critic_net', hidden_units['critic'], visual_net=self.critic_visual_net) self.actor_lr = tf.keras.optimizers.schedules.PolynomialDecay( actor_lr, self.max_episode, 1e-10, power=1.0) self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay( critic_lr, self.max_episode, 1e-10, power=1.0) self.optimizer_actor = tf.keras.optimizers.Adam( learning_rate=self.actor_lr(self.episode)) self.optimizer_critic = tf.keras.optimizers.Adam( learning_rate=self.critic_lr(self.episode))