예제 #1
0
파일: networks.py 프로젝트: yyht/RLs
    def __init__(self, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128,
                 *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., encoder_type='simple'):
        '''
        params:
            is_continuous: sepecify whether action space is continuous(True) or discrete(False)
            vector_dim: dimension of vector state input
            action_dim: dimension of action
            visual_dim: dimension of visual state input
            visual_feature: dimension of visual feature map
            eta: weight of intrinsic reward
            lr: the learning rate of curiosity model
            beta: weight factor of loss between inverse_dynamic_net and forward_net
            loss_weight: weight factor of loss between policy gradient and curiosity model
        '''
        super().__init__()
        self.device = get_device()
        self.eta = eta
        self.beta = beta
        self.loss_weight = loss_weight
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.is_continuous = is_continuous

        self.camera_num = visual_dim[0]
        if self.camera_num == 0:
            self.use_visual = False
        else:
            self.use_visual = True

        self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type)
        self.s_dim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0)

        if self.use_visual:
            # S, S' => A
            self.inverse_dynamic_net = Sequential([
                Dense(self.s_dim * 2, default_activation),
                Dense(action_dim, 'tanh' if is_continuous else None)
            ])

        # S, A => S'
        self.forward_net = Sequential([
            Dense(self.s_dim + action_dim, default_activation),
            Dense(self.s_dim, None)
        ])
        self.initial_weights(I(shape=vector_dim), I(shape=visual_dim), I(shape=action_dim))

        self.tv = []
        if self.use_visual:
            for net in self.nets:
                self.tv += net.trainable_variables
            self.tv += self.inverse_dynamic_net.trainable_variables
        self.tv += self.forward_net.trainable_variables
예제 #2
0
    def __init__(self, *args, **kwargs):
        '''
        inputs:
            a_dim: action spaces
            is_continuous: action type, refer to whether this control problem is continuous(True) or discrete(False)
            base_dir: the directory that store data, like model, logs, and other data
        '''
        super().__init__()
        base_dir = kwargs.get('base_dir')
        tf_dtype = str(kwargs.get('tf_dtype'))
        tf.random.set_seed(int(kwargs.get('seed', 0)))
        self.device = get_device()
        self.logger2file = bool(kwargs.get('logger2file', False))

        tf.keras.backend.set_floatx(tf_dtype)
        self.cp_dir, self.log_dir, self.excel_dir = [
            os.path.join(base_dir, i) for i in ['model', 'log', 'excel']
        ]
        self.global_step = tf.Variable(
            0, name="global_step", trainable=False, dtype=tf.int64
        )  # in TF 2.x must be tf.int64, because function set_step need args to be tf.int64.
        self.cast = self._cast(dtype=tf_dtype)
예제 #3
0
    def __init__(self,
                 name,
                 is_continuous,
                 vector_dim,
                 action_dim,
                 visual_dim=[],
                 visual_feature=128,
                 *,
                 eta=0.2,
                 lr=1.0e-3,
                 beta=0.2,
                 loss_weight=10.):
        '''
        params:
            name: name
            is_continuous: sepecify whether action space is continuous(True) or discrete(False)
            vector_dim: dimension of vector state input
            action_dim: dimension of action
            visual_dim: dimension of visual state input
            visual_feature: dimension of visual feature map
            eta: weight of intrinsic reward
            lr: the learning rate of curiosity model
            beta: weight factor of loss between inverse_dynamic_net and forward_net
            loss_weight: weight factor of loss between policy gradient and curiosity model
        '''
        super().__init__(name=name)
        self.device = get_device()
        self.eta = eta
        self.beta = beta
        self.loss_weight = loss_weight
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

        vdl = len(visual_dim)
        if vdl == 4 or vdl == 3:
            self.use_visual = True
            if vdl == 4:
                self.net = ConvLayer(Conv3D, [32, 64, 64],
                                     [[1, 8, 8], [1, 4, 4], [1, 3, 3]],
                                     [[1, 4, 4], [1, 2, 2], [1, 1, 1]],
                                     padding='valid',
                                     activation='elu')
            else:
                self.net = ConvLayer(Conv2D, [32, 64, 64],
                                     [[8, 8], [4, 4], [3, 3]],
                                     [[4, 4], [2, 2], [1, 1]],
                                     padding='valid',
                                     activation='elu')
            self.net.add(Dense(visual_feature, activation_fn))
            self.s_dim = visual_feature + vector_dim

            # S, S' => A
            self.inverse_dynamic_net = Sequential([
                Dense(self.s_dim * 2, activation_fn),
                Dense(action_dim, 'tanh' if is_continuous else None)
            ])
        else:
            self.use_visual = False
            self.net = lambda vs: vs
            self.s_dim = vector_dim

        # S, A => S'
        self.forward_net = Sequential([
            Dense(self.s_dim + action_dim, activation_fn),
            Dense(self.s_dim, None)
        ])
        self.initial_weights(tf.keras.Input(shape=visual_dim),
                             tf.keras.Input(shape=vector_dim),
                             tf.keras.Input(shape=action_dim))

        self.tv = []
        if self.use_visual:
            self.tv += self.net.trainable_variables
            self.tv += self.inverse_dynamic_net.trainable_variables
        self.tv += self.forward_net.trainable_variables