示例#1
0
 def _build_model(self):
   acts = dict(
       elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
       leaky_relu=tf.nn.leaky_relu)
   cnn_act = acts[self._c.cnn_act] # act 激活函数
   act = acts[self._c.dense_act]   
   self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
   self._dynamics = models.RSSM(
       self._c.stoch_size, self._c.deter_size, self._c.deter_size)
   self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
   self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
   if self._c.pcont: 
     self._pcont = models.DenseDecoder(
         (), 3, self._c.num_units, 'binary', act=act)
   self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
   self._actor = models.ActionDecoder(
       self._actdim, 4, self._c.num_units, self._c.action_dist,
       init_std=self._c.action_init_std, act=act)
   model_modules = [self._encode, self._dynamics, self._decode, self._reward]
   if self._c.pcont:
     model_modules.append(self._pcont)
   # 构建optimizer ,函数为first变量tool.Adam,
   # 其余为tool.Adam函数变量输入,
   # 剩余tool.Adam输入调用optimizer时输入
   Optimizer = functools.partial(
       tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
       wdpattern=self._c.weight_decay_pattern)
   self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
   self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
   self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
   # Do a train step to initialize all variables, including optimizer
   # statistics. Ideally, we would use batch size zero, but that doesn't work
   # in multi-GPU mode.
   self.train(next(self._dataset))
示例#2
0
 def _build_model(self):
   acts = dict(
       elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
       leaky_relu=tf.nn.leaky_relu)
   cnn_act = acts[self._c.cnn_act]
   act = acts[self._c.dense_act]
   self._encode = models.LaserConvEncoder(self._c.cnn_depth, cnn_act)
   self._dynamics = models.RSSM(
       self._c.stoch_size, self._c.deter_size, self._c.deter_size)
   self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
   self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
   if self._c.pcont:
     self._pcont = models.DenseDecoder(
         (), 3, self._c.num_units, 'binary', act=act)
   self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
   self._actor = models.ActionDecoder(
       self._actdim, 4, self._c.num_units, self._c.action_dist,
       init_std=self._c.action_init_std, act=act)
   model_modules = [self._encode, self._dynamics, self._decode, self._reward]
   if self._c.pcont:
     model_modules.append(self._pcont)
   Optimizer = functools.partial(
       tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
       wdpattern=self._c.weight_decay_pattern)
   self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
   self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
   self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
   self.train(next(self._dataset))
示例#3
0
    def _build_model(self):
        acts = dict(elu=tf.nn.elu,
                    relu=tf.nn.relu,
                    swish=tf.nn.swish,
                    leaky_relu=tf.nn.leaky_relu)
        cnn_act = acts[self._c.cnn_act]
        act = acts[self._c.dense_act]
        self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
        # self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size)
        if self._c.model_num == 'Dreamer':
            self._dynamics = models.RSSM(self._c.stoch_size,
                                         self._c.deter_size,
                                         self._c.deter_size)
            print("bulid RSSM")
        elif self._c.model_num == 'ED2_Dreamer':
            train_environ = self._c.task.split('_')[1]
            self._dynamics = models.RSSM_action_separate_with_group(
                self._c.stoch_size, self._c.deter_size, self._c.deter_size,
                group_separate[train_environ][str(self._c.separate_schema)])
            print("bulid action separate RSSM with schema{}, {}".format(
                self._c.separate_schema,
                group_separate[train_environ][str(self._c.separate_schema)]))

        self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
        self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
        if self._c.pcont:
            self._pcont = models.DenseDecoder((),
                                              3,
                                              self._c.num_units,
                                              'binary',
                                              act=act)
        self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
        self._actor = models.ActionDecoder(self._actdim,
                                           4,
                                           self._c.num_units,
                                           self._c.action_dist,
                                           init_std=self._c.action_init_std,
                                           act=act)
        model_modules = [
            self._encode, self._dynamics, self._decode, self._reward
        ]
        if self._c.pcont:
            model_modules.append(self._pcont)
        Optimizer = functools.partial(tools.Adam,
                                      wd=self._c.weight_decay,
                                      clip=self._c.grad_clip,
                                      wdpattern=self._c.weight_decay_pattern)
        self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
        self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
        self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
        # Do a train step to initialize all variables, including optimizer
        # statistics. Ideally, we would use batch size zero, but that doesn't work
        # in multi-GPU mode.
        self.train(next(self._dataset))
示例#4
0
    def _build_model(self):
        acts = dict(elu=tf.nn.elu,
                    relu=tf.nn.relu,
                    swish=tf.nn.swish,
                    leaky_relu=tf.nn.leaky_relu)
        cnn_act = acts[self._c.cnn_act]
        act = acts[self._c.dense_act]
        self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
        self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size,
                                     self._c.deter_size)
        self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
        self._contrastive = models.ContrastiveObsModel(self._c.deter_size,
                                                       self._c.deter_size * 2)
        self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
        if self._c.pcont:
            self._pcont = models.DenseDecoder((),
                                              3,
                                              self._c.num_units,
                                              'binary',
                                              act=act)
        self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
        self._Qs = [
            models.QNetwork(3, self._c.num_units, act=act)
            for _ in range(self._c.num_Qs)
        ]
        self._actor = models.ActionDecoder(self._actdim,
                                           4,
                                           self._c.num_units,
                                           self._c.action_dist,
                                           init_std=self._c.action_init_std,
                                           act=act)
        model_modules = [
            self._encode, self._dynamics, self._contrastive, self._reward,
            self._decode
        ]
        if self._c.pcont:
            model_modules.append(self._pcont)
        Optimizer = functools.partial(tools.Adam,
                                      wd=self._c.weight_decay,
                                      clip=self._c.grad_clip,
                                      wdpattern=self._c.weight_decay_pattern)
        self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
        self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
        self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
        self._q_opts = [
            Optimizer('qs', [qnet], self._c.value_lr) for qnet in self._Qs
        ]

        if self._c.use_sac:
            self._sac = soft_actor_critic.SAC(self._actor, self._Qs,
                                              self._actor_opt, self._q_opts,
                                              self._actspace)

        self.train(next(self._dataset))
示例#5
0
 def _build_model(self):
     acts = dict(
         elu=tf.nn.elu,
         relu=tf.nn.relu,
         swish=tf.nn.swish,
         leaky_relu=tf.nn.leaky_relu,
     )
     cnn_act = acts[self._c.cnn_act]
     act = acts[self._c.dense_act]
     self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
     ######################################################################
     #  RE3: Random Encoder / RunningMeanStd Modules
     self._rand_encode = models.ConvRandEncoder(self._c.cnn_depth, cnn_act)
     self._rms = models.RMS()
     ######################################################################
     self._dynamics = models.RSSM(
         self._c.stoch_size, self._c.deter_size, self._c.deter_size
     )
     self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
     self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
     if self._c.pcont:
         self._pcont = models.DenseDecoder(
             (), 3, self._c.num_units, "binary", act=act
         )
     self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
     self._actor = models.ActionDecoder(
         self._actdim,
         4,
         self._c.num_units,
         self._c.action_dist,
         init_std=self._c.action_init_std,
         act=act,
     )
     model_modules = [self._encode, self._dynamics, self._decode, self._reward]
     if self._c.pcont:
         model_modules.append(self._pcont)
     Optimizer = functools.partial(
         tools.Adam,
         wd=self._c.weight_decay,
         clip=self._c.grad_clip,
         wdpattern=self._c.weight_decay_pattern,
     )
     self._model_opt = Optimizer("model", model_modules, self._c.model_lr)
     self._value_opt = Optimizer("value", [self._value], self._c.value_lr)
     self._actor_opt = Optimizer("actor", [self._actor], self._c.actor_lr)
     # Do a train step to initialize all variables, including optimizer
     # statistics. Ideally, we would use batch size zero, but that doesn't work
     # in multi-GPU mode.
     self.train(next(self._dataset))
示例#6
0
文件: lompo.py 项目: rmrafailov/LOMPO
    def _build_model(self):
        acts = dict(elu=tf.nn.elu,
                    relu=tf.nn.relu,
                    swish=tf.nn.swish,
                    leaky_relu=tf.nn.leaky_relu)
        cnn_act = acts[self._c.cnn_act]
        act = acts[self._c.dense_act]

        #Create encoder based on environment observations
        if self._c.proprio:
            if self._c.im_size == 64:
                self._encode = models.ConvEncoderProprio(
                    self._c.cnn_depth, cnn_act)
            else:
                self._encode = models.ConvEncoderProprioLarge(
                    self._c.cnn_depth, cnn_act)
        else:
            if self._c.im_size == 64:
                self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
            else:
                self._encode = models.ConvEncoderLarge(self._c.cnn_depth,
                                                       cnn_act)
        #RSSM model with ensables
        self._dynamics = models.RSSME(self._c.stoch_size,
                                      self._c.deter_size,
                                      self._c.deter_size,
                                      num_models=self._c.num_models)
        #Create decoder based on image size
        if self._c.im_size == 64:
            self._decode = models.ConvDecoder(self._c.cnn_depth,
                                              cnn_act,
                                              shape=(self._c.im_size,
                                                     self._c.im_size, 3))
        else:
            self._decode = models.ConvDecoderLarge(self._c.cnn_depth,
                                                   cnn_act,
                                                   shape=(self._c.im_size,
                                                          self._c.im_size, 3))
        if self._c.proprio:
            self._proprio = models.DenseDecoder((self._propriodim, ),
                                                3,
                                                self._c.num_units,
                                                act=act)
        if self._c.pcont:
            self._pcont = models.DenseDecoder((),
                                              3,
                                              self._c.num_units,
                                              'binary',
                                              act=act)
        self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)

        model_modules = [
            self._encode, self._dynamics, self._decode, self._reward
        ]
        if self._c.proprio:
            model_modules.append(self._proprio)
        if self._c.pcont:
            model_modules.append(self._pcont)

        #Build actor-critic networks
        self._qf1 = models.DenseNetwork(1, 3, self._c.num_units, act=act)
        self._qf2 = models.DenseNetwork(1, 3, self._c.num_units, act=act)
        self._target_qf1 = deepcopy(self._qf2)
        self._target_qf2 = deepcopy(self._qf1)
        self._qf_criterion = tf.keras.losses.Huber()
        self._actor = models.ActorNetwork(self._actdim,
                                          4,
                                          self._c.num_units,
                                          act=act)

        #Initialize optimizers
        Optimizer = functools.partial(tools.Adam,
                                      wd=self._c.weight_decay,
                                      clip=self._c.grad_clip,
                                      wdpattern=self._c.weight_decay_pattern)

        self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
        self._qf_opt = Optimizer('qf', [self._qf1, self._qf2], self._c.q_lr)
        self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)