def __init__(self, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128, *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., encoder_type='simple'): ''' params: is_continuous: sepecify whether action space is continuous(True) or discrete(False) vector_dim: dimension of vector state input action_dim: dimension of action visual_dim: dimension of visual state input visual_feature: dimension of visual feature map eta: weight of intrinsic reward lr: the learning rate of curiosity model beta: weight factor of loss between inverse_dynamic_net and forward_net loss_weight: weight factor of loss between policy gradient and curiosity model ''' super().__init__() self.device = get_device() self.eta = eta self.beta = beta self.loss_weight = loss_weight self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr) self.is_continuous = is_continuous self.camera_num = visual_dim[0] if self.camera_num == 0: self.use_visual = False else: self.use_visual = True self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type) self.s_dim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0) if self.use_visual: # S, S' => A self.inverse_dynamic_net = Sequential([ Dense(self.s_dim * 2, default_activation), Dense(action_dim, 'tanh' if is_continuous else None) ]) # S, A => S' self.forward_net = Sequential([ Dense(self.s_dim + action_dim, default_activation), Dense(self.s_dim, None) ]) self.initial_weights(I(shape=vector_dim), I(shape=visual_dim), I(shape=action_dim)) self.tv = [] if self.use_visual: for net in self.nets: self.tv += net.trainable_variables self.tv += self.inverse_dynamic_net.trainable_variables self.tv += self.forward_net.trainable_variables
def __init__(self, *args, **kwargs): ''' inputs: a_dim: action spaces is_continuous: action type, refer to whether this control problem is continuous(True) or discrete(False) base_dir: the directory that store data, like model, logs, and other data ''' super().__init__() base_dir = kwargs.get('base_dir') tf_dtype = str(kwargs.get('tf_dtype')) tf.random.set_seed(int(kwargs.get('seed', 0))) self.device = get_device() self.logger2file = bool(kwargs.get('logger2file', False)) tf.keras.backend.set_floatx(tf_dtype) self.cp_dir, self.log_dir, self.excel_dir = [ os.path.join(base_dir, i) for i in ['model', 'log', 'excel'] ] self.global_step = tf.Variable( 0, name="global_step", trainable=False, dtype=tf.int64 ) # in TF 2.x must be tf.int64, because function set_step need args to be tf.int64. self.cast = self._cast(dtype=tf_dtype)
def __init__(self, name, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128, *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10.): ''' params: name: name is_continuous: sepecify whether action space is continuous(True) or discrete(False) vector_dim: dimension of vector state input action_dim: dimension of action visual_dim: dimension of visual state input visual_feature: dimension of visual feature map eta: weight of intrinsic reward lr: the learning rate of curiosity model beta: weight factor of loss between inverse_dynamic_net and forward_net loss_weight: weight factor of loss between policy gradient and curiosity model ''' super().__init__(name=name) self.device = get_device() self.eta = eta self.beta = beta self.loss_weight = loss_weight self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr) vdl = len(visual_dim) if vdl == 4 or vdl == 3: self.use_visual = True if vdl == 4: self.net = ConvLayer(Conv3D, [32, 64, 64], [[1, 8, 8], [1, 4, 4], [1, 3, 3]], [[1, 4, 4], [1, 2, 2], [1, 1, 1]], padding='valid', activation='elu') else: self.net = ConvLayer(Conv2D, [32, 64, 64], [[8, 8], [4, 4], [3, 3]], [[4, 4], [2, 2], [1, 1]], padding='valid', activation='elu') self.net.add(Dense(visual_feature, activation_fn)) self.s_dim = visual_feature + vector_dim # S, S' => A self.inverse_dynamic_net = Sequential([ Dense(self.s_dim * 2, activation_fn), Dense(action_dim, 'tanh' if is_continuous else None) ]) else: self.use_visual = False self.net = lambda vs: vs self.s_dim = vector_dim # S, A => S' self.forward_net = Sequential([ Dense(self.s_dim + action_dim, activation_fn), Dense(self.s_dim, None) ]) self.initial_weights(tf.keras.Input(shape=visual_dim), tf.keras.Input(shape=vector_dim), tf.keras.Input(shape=action_dim)) self.tv = [] if self.use_visual: self.tv += self.net.trainable_variables self.tv += self.inverse_dynamic_net.trainable_variables self.tv += self.forward_net.trainable_variables