def __init__(self, split_shape, d_model, use_orthogonal=True, use_ReLU=False): super(SelfEmbedding, self).__init__() self.split_shape = split_shape if use_orthogonal: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) for i in range(len(split_shape)): if i == (len(split_shape) - 1): setattr( self, 'fc_' + str(i), nn.Sequential(init_(nn.Linear(split_shape[i][1], d_model)), active_func, nn.LayerNorm(d_model))) else: setattr( self, 'fc_' + str(i), nn.Sequential( init_( nn.Linear(split_shape[i][1] + split_shape[-1][1], d_model)), active_func, nn.LayerNorm(d_model)))
def __init__(self, num_agents, num_inputs, lstm=False, naive_recurrent=False, recurrent=False, hidden_size=64): super(MLPBase, self).__init__(lstm, naive_recurrent, recurrent, num_agents, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential( init_(nn.Linear(num_inputs * num_agents, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, heads, d_model, dropout=0.0, use_orthogonal=True): super(MultiHeadAttention, self).__init__() if use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0)) self.d_model = d_model self.d_k = d_model // heads self.h = heads self.q_linear = init_(nn.Linear(d_model, d_model)) self.v_linear = init_(nn.Linear(d_model, d_model)) self.k_linear = init_(nn.Linear(d_model, d_model)) self.dropout = nn.Dropout(dropout) self.out = init_(nn.Linear(d_model, d_model))
def __init__(self, num_agents, inputs, lstm=False, naive_recurrent=False, recurrent=False, hidden_size=64): super(CNNBase, self).__init__(lstm, naive_recurrent, recurrent, num_agents, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) num_inputs = inputs[0] num_image = inputs[1] self.actor = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1), hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) self.critic = nn.Sequential( init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1), hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), ) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, d_model, d_ff=512, dropout=0.0, use_orthogonal=True, use_ReLU=False): super(FeedForward, self).__init__() # We set d_ff as a default to 2048 if use_orthogonal: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) self.linear_1 = nn.Sequential(init_(nn.Linear(d_model, d_ff)), active_func, nn.LayerNorm(d_ff)) self.dropout = nn.Dropout(dropout) self.linear_2 = init_(nn.Linear(d_ff, d_model))
def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal, use_ReLU): super(MLPLayer, self).__init__() self._layer_N = layer_N if use_orthogonal: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc2 = get_clones(self.fc_h, self._layer_N)
def __init__(self, obs_shape, share_obs_shape, naive_recurrent=False, recurrent=False, hidden_size=64, recurrent_N=1, attn=False, attn_size=512, attn_N=2, attn_heads=8, dropout=0.05, use_average_pool=True, use_common_layer=False, use_feature_normlization=True, use_feature_popart=True, use_orthogonal=True, layer_N=1, use_ReLU=False): super(MLPBase, self).__init__(obs_shape, share_obs_shape, naive_recurrent, recurrent, hidden_size, recurrent_N, attn, attn_size, attn_N, attn_heads, dropout, use_average_pool, use_common_layer, use_orthogonal, use_ReLU) self._use_common_layer = use_common_layer self._use_feature_normlization = use_feature_normlization self._use_feature_popart = use_feature_popart self._use_orthogonal = use_orthogonal self._layer_N = layer_N self._use_ReLU = use_ReLU self._attn = attn assert ( self._use_feature_normlization and self._use_feature_popart ) == False, ( "--use_feature_normlization and --use_feature_popart can not be set True simultaneously." ) obs_dim = obs_shape[0] share_obs_dim = share_obs_shape[0] if self._use_feature_popart: self.actor_norm = PopArt(obs_dim) self.critic_norm = PopArt(share_obs_dim) if self._use_feature_normlization: self.actor_norm = nn.LayerNorm(obs_dim) self.critic_norm = nn.LayerNorm(share_obs_dim) if self._attn: if use_average_pool == True: num_inputs_actor = attn_size + obs_shape[-1][1] num_inputs_critic = attn_size else: num_inputs = 0 split_shape = obs_shape[1:] for i in range(len(split_shape)): num_inputs += split_shape[i][0] num_inputs_critic = 0 split_shape_critic = share_obs_shape[1:] for i in range(len(split_shape_critic)): num_inputs_critic += split_shape_critic[i][0] num_inputs_actor = num_inputs * attn_size num_inputs_critic = num_inputs_critic * attn_size self.actor_attn_norm = nn.LayerNorm(num_inputs_actor) self.critic_attn_norm = nn.LayerNorm(num_inputs_critic) else: num_inputs_actor = obs_dim num_inputs_critic = share_obs_dim if self._use_orthogonal: if self._use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if self._use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) self.actor = MLPLayer(num_inputs_actor, hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU) self.critic = MLPLayer(num_inputs_critic, hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU) if self._use_common_layer: self.actor = nn.Sequential( init_(nn.Linear(num_inputs_actor, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.critic = nn.Sequential( init_(nn.Linear(num_inputs_critic, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc_h = nn.Sequential( init_(nn.Linear(hidden_size, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.common_linear = get_clones(self.fc_h, self._layer_N) self.actor_rnn_norm = nn.LayerNorm(hidden_size) self.critic_rnn_norm = nn.LayerNorm(hidden_size) self.critic_linear = init_(nn.Linear(hidden_size, 1))
def __init__(self, obs_shape, num_agents, naive_recurrent=False, recurrent=False, hidden_size=64, attn=False, attn_size=512, attn_N=2, attn_heads=8, dropout=0.05, use_average_pool=True, use_common_layer=False, use_feature_normlization=False, use_feature_popart=False, use_orthogonal=True, layer_N=1, use_ReLU=False): super(CNNBase, self).__init__(obs_shape, num_agents, naive_recurrent, recurrent, hidden_size, attn, attn_size, attn_N, attn_heads, dropout, use_average_pool, use_common_layer, use_orthogonal) self._use_common_layer = use_common_layer self._use_orthogonal = use_orthogonal if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) num_inputs = obs_shape[0] num_image = obs_shape[1] self.actor = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 3, stride=1)), nn.ReLU(), #init_(nn.Conv2d(32, 64, 3, stride=1)), nn.ReLU(), #init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1), hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) self.critic = nn.Sequential( init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)), nn.ReLU(), #init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), #init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1), hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), ) if self._use_common_layer: self.actor = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 3, stride=1)), nn.ReLU()) self.critic = nn.Sequential( init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)), nn.ReLU()) self.common_linear = nn.Sequential( Flatten(), init_( nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1), hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1))
def __init__(self, obs_shape, num_agents, naive_recurrent=False, recurrent=False, hidden_size=64, recurrent_N=1, attn=False, attn_only_critic=False, attn_size=512, attn_N=2, attn_heads=8, dropout=0.05, use_average_pool=True, use_common_layer=False, use_feature_normlization=True, use_feature_popart=True, use_orthogonal=True, layer_N=1, use_ReLU=False, use_same_dim=False): super(MLPBase, self).__init__(obs_shape, num_agents, naive_recurrent, recurrent, hidden_size, recurrent_N, attn, attn_only_critic, attn_size, attn_N, attn_heads, dropout, use_average_pool, use_common_layer, use_orthogonal, use_ReLU, use_same_dim) self._use_common_layer = use_common_layer self._use_feature_normlization = use_feature_normlization self._use_feature_popart = use_feature_popart self._use_orthogonal = use_orthogonal self._layer_N = layer_N self._use_ReLU = use_ReLU self._use_same_dim = use_same_dim self._attn = attn self._attn_only_critic = attn_only_critic assert ( self._use_feature_normlization and self._use_feature_popart ) == False, ( "--use_feature_normlization and --use_feature_popart can not be set True simultaneously." ) if 'int' not in obs_shape[0].__class__.__name__: # mixed obs all_obs_space = obs_shape agent_id = num_agents num_agents = len(all_obs_space) if all_obs_space[agent_id].__class__.__name__ == "Box": obs_shape = all_obs_space[agent_id].shape else: obs_shape = all_obs_space[agent_id] share_obs_dim = 0 for obs_space in all_obs_space: share_obs_dim += obs_space.shape[0] else: if self._use_same_dim: share_obs_dim = obs_shape[0] else: share_obs_dim = obs_shape[0] * num_agents if self._use_feature_popart: self.actor_norm = PopArt(obs_shape[0]) self.critic_norm = PopArt(share_obs_dim) if self._use_feature_normlization: self.actor_norm = nn.LayerNorm(obs_shape[0]) self.critic_norm = nn.LayerNorm(share_obs_dim) if self._attn: if use_average_pool == True: num_inputs_actor = attn_size + obs_shape[-1][1] if self._use_same_dim: num_inputs_critic = attn_size + obs_shape[-1][1] else: num_inputs_critic = attn_size else: num_inputs = 0 split_shape = obs_shape[1:] for i in range(len(split_shape)): num_inputs += split_shape[i][0] num_inputs_actor = num_inputs * attn_size if self._use_same_dim: num_inputs_critic = num_inputs * attn_size else: num_inputs_critic = num_agents * attn_size self.actor_attn_norm = nn.LayerNorm(num_inputs_actor) self.critic_attn_norm = nn.LayerNorm(num_inputs_critic) elif self._attn_only_critic: num_inputs_actor = obs_shape[0] if use_average_pool == True: num_inputs_critic = attn_size else: num_inputs_critic = num_agents * attn_size self.critic_attn_norm = nn.LayerNorm(num_inputs_critic) else: num_inputs_actor = obs_shape[0] num_inputs_critic = share_obs_dim if self._use_orthogonal: if self._use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if self._use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) self.actor = MLPLayer(num_inputs_actor, hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU) self.critic = MLPLayer(num_inputs_critic, hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU) if self._use_common_layer: self.actor = nn.Sequential( init_(nn.Linear(num_inputs_actor, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.critic = nn.Sequential( init_(nn.Linear(num_inputs_critic, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc_h = nn.Sequential( init_(nn.Linear(hidden_size, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.common_linear = get_clones(self.fc_h, self._layer_N) self.actor_rnn_norm = nn.LayerNorm(hidden_size) self.critic_rnn_norm = nn.LayerNorm(hidden_size) self.critic_linear = init_(nn.Linear(hidden_size, 1))