def __init__(self, feat_dim=64, rnn_units=8, *, use_rnn=False, network_type=MemoryNetworkType.LSTM): super().__init__() # self.masking = tf.keras.layers.Masking(mask_value=0.) # ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported. # https://github.com/tensorflow/tensorflow/issues/31998 self.use_rnn = use_rnn self.h_dim = rnn_units if use_rnn else feat_dim self.network_type = network_type if use_rnn: if self.network_type == MemoryNetworkType.GRU: self.cell_nums = 1 cell = tf.keras.layers.GRUCell(rnn_units) elif self.network_type == MemoryNetworkType.LSTM: self.cell_nums = 2 cell = tf.keras.layers.LSTMCell(rnn_units) self.rnn_net = tf.keras.layers.RNN(cell, return_state=True, return_sequences=True) self(*([I(shape=(None, feat_dim))] + [I(shape=rnn_units) for _ in range(self.cell_nums)])) else: self.cell_nums = 1 self.rnn_net = lambda x, initial_state: (x, initial_state)
def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units): super().__init__() self.action_dim = action_dim self.q_net_head = mlp(hidden_units['q_net'], out_layer=False) # [B, vector_dim] self.quantile_net = mlp(hidden_units['quantile'], out_layer=False) # [N*B, quantiles_idx] self.q_net_tile = mlp(hidden_units['tile'], output_shape=action_dim, out_activation=None) # [N*B, hidden_units['quantile'][-1]] self(I(shape=vector_dim), I(shape=quantiles_idx))
def __init__(self, vector_dim, action_dim, hidden_units): assert len( hidden_units ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1" super().__init__() self.feature_net = mlp(hidden_units[0:1]) self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, dim, hidden_units): super().__init__() self.rnn_type = 'lstm' # self.masking = tf.keras.layers.Masking(mask_value=0.) # ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported. # https://github.com/tensorflow/tensorflow/issues/31998 cell = tf.keras.layers.LSTMCell(hidden_units) self.lstm_net = tf.keras.layers.RNN(cell, return_state=True, return_sequences=True) self(I(shape=(None, dim)), I(shape=(hidden_units,)), I(shape=(hidden_units,)))
def __init__(self, vector_dims, visual_dims, vector_net_kwargs, visual_net_kwargs, encoder_net_kwargs, memory_net_kwargs, is_continuous, action_dim, *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., network_type=VisualNetworkType.SIMPLE): ''' params: is_continuous: sepecify whether action space is continuous(True) or discrete(False) visual_dims: dimensions of vector state input action_dim: dimension of action visual_dims: dimensions of visual state input eta: weight of intrinsic reward lr: the learning rate of curiosity model beta: weight factor of loss between inverse_dynamic_net and forward_net loss_weight: weight factor of loss between policy gradient and curiosity model ''' super().__init__() self.device = get_device() self.eta = eta self.beta = beta self.loss_weight = loss_weight self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr) self.is_continuous = is_continuous self.net = DefaultRepresentationNetwork( name='curiosity_model', vec_dims=vector_dims, vis_dims=visual_dims, vector_net_kwargs=vector_net_kwargs, visual_net_kwargs=visual_net_kwargs, encoder_net_kwargs=encoder_net_kwargs, memory_net_kwargs=memory_net_kwargs ) self.feat_dim = self.net.h_dim # S, S' => A self.inverse_dynamic_net = Sequential([ Dense(self.feat_dim * 2, default_activation, **initKernelAndBias), Dense(action_dim, 'tanh' if is_continuous else None, **initKernelAndBias) ]) # S, A => S' self.forward_net = Sequential([ Dense(self.feat_dim + action_dim, default_activation, **initKernelAndBias), Dense(self.feat_dim, None, **initKernelAndBias) ]) self.initial_weights(I(shape=self.feat_dim), I(shape=action_dim))
def __init__(self, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128, *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., encoder_type='simple'): ''' params: is_continuous: sepecify whether action space is continuous(True) or discrete(False) vector_dim: dimension of vector state input action_dim: dimension of action visual_dim: dimension of visual state input visual_feature: dimension of visual feature map eta: weight of intrinsic reward lr: the learning rate of curiosity model beta: weight factor of loss between inverse_dynamic_net and forward_net loss_weight: weight factor of loss between policy gradient and curiosity model ''' super().__init__() self.device = get_device() self.eta = eta self.beta = beta self.loss_weight = loss_weight self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr) self.is_continuous = is_continuous self.camera_num = visual_dim[0] if self.camera_num == 0: self.use_visual = False else: self.use_visual = True self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type) self.s_dim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0) if self.use_visual: # S, S' => A self.inverse_dynamic_net = Sequential([ Dense(self.s_dim * 2, default_activation), Dense(action_dim, 'tanh' if is_continuous else None) ]) # S, A => S' self.forward_net = Sequential([ Dense(self.s_dim + action_dim, default_activation), Dense(self.s_dim, None) ]) self.initial_weights(I(shape=vector_dim), I(shape=visual_dim), I(shape=action_dim)) self.tv = [] if self.use_visual: for net in self.nets: self.tv += net.trainable_variables self.tv += self.inverse_dynamic_net.trainable_variables self.tv += self.forward_net.trainable_variables
def __init__(self, vector_dim, visual_dim=[], visual_feature=128, encoder_type='nature'): super().__init__() self.camera_num = visual_dim[0] self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type) self.hdim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0) self(I(shape=vector_dim), I(shape=visual_dim))
def __init__(self, vector_dim, output_shape, head_num, hidden_units): super().__init__() self.nets = [ mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num) ] self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, hidden_units): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, feat_dim=64, output_dim=64, *, use_encoder=False): # TODO super().__init__() self.use_encoder = use_encoder self.h_dim = output_dim if use_encoder else feat_dim self.net = Dense(output_dim, default_activation, ** initKernelAndBias) if use_encoder else lambda x: x self(I(shape=feat_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False) self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None) self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, network_settings): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(network_settings, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, img_dim, fc_dim): super().__init__() self.net = Sequential([ get_visual_network_from_type(VisualNetworkType.NATURE)(), Dense(fc_dim, **initKernelAndBias), LayerNormalization() ]) self(I(shape=img_dim))
def __init__(self, vector_dim, action_dim, atoms, network_settings): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(network_settings, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, head_num, network_settings): super().__init__() self.nets = [ mlp(network_settings, output_shape=output_shape, out_activation=None) for _ in range(head_num) ] self(I(shape=vector_dim))
def __init__(self, img_dim, fc_dim): super().__init__() self.net = Sequential( [NatureCNN(), Flatten(), Dense(fc_dim), LayerNormalization()]) self(I(shape=img_dim))
def __init__(self, vector_dim, action_dim, atoms, network_settings): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(network_settings, out_layer=False) self.outputs = [] for _ in range(action_dim): self.outputs.append(Dense(atoms, activation='softmax')) self(I(shape=vector_dim))
def __init__(self, vector_dim=[], network_type=VectorNetworkType.CONCAT): super().__init__() self.nets = [] for in_dim in vector_dim: self.nets.append( get_vector_network_from_type(network_type)(in_dim=in_dim)) self.h_dim = sum([net.h_dim for net in self.nets]) if vector_dim: self(*(I(shape=dim) for dim in vector_dim))
def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(hidden_units['share'], out_layer=False) self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None) self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings, out_activation='tanh'): super().__init__() self.net = mlp(network_settings, output_shape=output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.share = mlp(network_settings['share'], out_layer=False) self.logits = mlp(network_settings['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(network_settings['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.share = mlp(network_settings['share'], out_layer=False) self.mu = mlp(network_settings['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(network_settings['log_std'], output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units, out_activation='tanh'): super().__init__() self.net = mlp(hidden_units, output_shape=output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, dim, hidden_units, use_rnn): super().__init__() self.use_rnn = use_rnn if use_rnn: self.dim = dim # self.masking = tf.keras.layers.Masking(mask_value=0.) self.lstm_net = tf.keras.layers.LSTM(hidden_units, return_state=True, return_sequences=True) self(I(shape=(None, self.dim))) self.hdim = hidden_units else: self.hdim = dim
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.soft_clip = network_settings['soft_clip'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = mlp(network_settings['share'], out_layer=False) self.mu = mlp(network_settings['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(network_settings['log_std'], output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, network_settings, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(network_settings, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, hidden_units, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(hidden_units, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim=[]): # TODO super().__init__() self.nets = [] for _ in vector_dim: def net(x): return x self.nets.append(net) self.h_dim = sum(vector_dim) self.use_vector = not self.h_dim == 0 if vector_dim: self(*(I(shape=dim) for dim in vector_dim))