def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units): super().__init__() self.action_dim = action_dim self.q_net_head = mlp(hidden_units['q_net'], out_layer=False) # [B, vector_dim] self.quantile_net = mlp(hidden_units['quantile'], out_layer=False) # [N*B, quantiles_idx] self.q_net_tile = mlp(hidden_units['tile'], output_shape=action_dim, out_activation=None) # [N*B, hidden_units['quantile'][-1]] self(I(shape=vector_dim), I(shape=quantiles_idx))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False) self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None) self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(hidden_units['share'], out_layer=False) self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None) self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.share = mlp(network_settings['share'], out_layer=False) self.mu = mlp(network_settings['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(network_settings['log_std'], output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, network_settings): assert len( network_settings ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1" super().__init__() self.feature_net = mlp(network_settings[0:1]) self.net = mlp(network_settings[1:], output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.share = mlp(network_settings['share'], out_layer=False) self.logits = mlp(network_settings['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(network_settings['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.soft_clip = network_settings['soft_clip'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = mlp(network_settings['share'], out_layer=False) self.mu = mlp(network_settings['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(network_settings['log_std'], output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, head_num, network_settings): super().__init__() self.nets = [ mlp(network_settings, output_shape=output_shape, out_activation=None) for _ in range(head_num) ] self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, network_settings): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(network_settings, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, network_settings): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(network_settings, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, condition_sigma, network_settings): super().__init__() self.condition_sigma = condition_sigma self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = mlp(network_settings['hidden_units'], out_layer=False) self.mu = mlp([], output_shape=output_shape, out_activation='tanh') if self.condition_sigma: self.log_std = mlp([], output_shape=output_shape, out_activation=None) else: self.log_std = tf.Variable( initial_value=-0.5 * tf.ones(output_shape, dtype=tf.dtypes.float32), trainable=True) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units, is_continuous): super().__init__() self.is_continuous = is_continuous out_activation = 'tanh' if self.is_continuous else None self.net = mlp(hidden_units, act_fn='tanh', output_shape=output_shape, out_activation=out_activation, out_layer=True) self.weights_2dim = [[i, j] for i, j in zip([vector_dim] + hidden_units, hidden_units + [output_shape])] self.weights_nums = np.asarray(self.weights_2dim).prod(axis=-1).tolist() self.weights_total_nums = np.asarray(self.weights_2dim).prod(axis=-1).sum() + np.asarray(hidden_units).sum() + output_shape self(tf.keras.Input(shape=vector_dim)) # 初始化网络权重
def __init__(self, vector_dim, action_dim, atoms, network_settings): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(network_settings, out_layer=False) self.outputs = [] for _ in range(action_dim): self.outputs.append(Dense(atoms, activation='softmax')) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings, out_activation='tanh'): super().__init__() self.net = mlp(network_settings, output_shape=output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, network_settings, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(network_settings, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, options_num, network_settings, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(network_settings['share'], out_layer=False) self.q = mlp(network_settings['q'], output_shape=options_num, out_activation=None) self.pi = mlp(network_settings['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(network_settings['termination'], output_shape=options_num, out_activation='sigmoid') self.o = mlp(network_settings['o'], output_shape=options_num, out_activation=tf.nn.log_softmax) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.logits = mlp(hidden_units, output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, hidden_units): super().__init__() self.net = mlp(hidden_units, output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, vector_dim, output_shape, head_num, hidden_units): super().__init__() self.nets = [mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num)] self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, hidden_units, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(hidden_units, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, network_settings): super().__init__() self.net = mlp(network_settings, output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, hidden_units): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.net = mlp(hidden_units, output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))