Exemplo n.º 1
0
    def __init__(self,
                 split_shape,
                 d_model,
                 use_orthogonal=True,
                 use_ReLU=False):
        super(SelfEmbedding, self).__init__()
        self.split_shape = split_shape

        if use_orthogonal:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        for i in range(len(split_shape)):
            if i == (len(split_shape) - 1):
                setattr(
                    self, 'fc_' + str(i),
                    nn.Sequential(init_(nn.Linear(split_shape[i][1], d_model)),
                                  active_func, nn.LayerNorm(d_model)))
            else:
                setattr(
                    self, 'fc_' + str(i),
                    nn.Sequential(
                        init_(
                            nn.Linear(split_shape[i][1] + split_shape[-1][1],
                                      d_model)), active_func,
                        nn.LayerNorm(d_model)))
Exemplo n.º 2
0
    def __init__(self,
                 num_agents,
                 num_inputs,
                 lstm=False,
                 naive_recurrent=False,
                 recurrent=False,
                 hidden_size=64):
        super(MLPBase, self).__init__(lstm, naive_recurrent, recurrent,
                                      num_agents, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(
            init_(nn.Linear(num_inputs * num_agents, hidden_size)), nn.Tanh(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Exemplo n.º 3
0
    def __init__(self, heads, d_model, dropout=0.0, use_orthogonal=True):
        super(MultiHeadAttention, self).__init__()
        if use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0))

        self.d_model = d_model
        self.d_k = d_model // heads
        self.h = heads

        self.q_linear = init_(nn.Linear(d_model, d_model))
        self.v_linear = init_(nn.Linear(d_model, d_model))
        self.k_linear = init_(nn.Linear(d_model, d_model))
        self.dropout = nn.Dropout(dropout)
        self.out = init_(nn.Linear(d_model, d_model))
Exemplo n.º 4
0
    def __init__(self,
                 num_agents,
                 inputs,
                 lstm=False,
                 naive_recurrent=False,
                 recurrent=False,
                 hidden_size=64):
        super(CNNBase, self).__init__(lstm, naive_recurrent, recurrent,
                                      num_agents, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        num_inputs = inputs[0]
        num_image = inputs[1]

        self.actor = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 3, stride=1)), nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1),
                          hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU())

        self.critic = nn.Sequential(
            init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1),
                          hidden_size)),
            nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
        )

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Exemplo n.º 5
0
    def __init__(self,
                 d_model,
                 d_ff=512,
                 dropout=0.0,
                 use_orthogonal=True,
                 use_ReLU=False):

        super(FeedForward, self).__init__()
        # We set d_ff as a default to 2048
        if use_orthogonal:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        self.linear_1 = nn.Sequential(init_(nn.Linear(d_model, d_ff)),
                                      active_func, nn.LayerNorm(d_ff))

        self.dropout = nn.Dropout(dropout)
        self.linear_2 = init_(nn.Linear(d_ff, d_model))
Exemplo n.º 6
0
    def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal,
                 use_ReLU):
        super(MLPLayer, self).__init__()
        self._layer_N = layer_N

        if use_orthogonal:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)),
                                 active_func, nn.LayerNorm(hidden_size))
        self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)),
                                  active_func, nn.LayerNorm(hidden_size))
        self.fc2 = get_clones(self.fc_h, self._layer_N)
Exemplo n.º 7
0
    def __init__(self,
                 obs_shape,
                 share_obs_shape,
                 naive_recurrent=False,
                 recurrent=False,
                 hidden_size=64,
                 recurrent_N=1,
                 attn=False,
                 attn_size=512,
                 attn_N=2,
                 attn_heads=8,
                 dropout=0.05,
                 use_average_pool=True,
                 use_common_layer=False,
                 use_feature_normlization=True,
                 use_feature_popart=True,
                 use_orthogonal=True,
                 layer_N=1,
                 use_ReLU=False):
        super(MLPBase,
              self).__init__(obs_shape, share_obs_shape, naive_recurrent,
                             recurrent, hidden_size, recurrent_N, attn,
                             attn_size, attn_N, attn_heads, dropout,
                             use_average_pool, use_common_layer,
                             use_orthogonal, use_ReLU)

        self._use_common_layer = use_common_layer
        self._use_feature_normlization = use_feature_normlization
        self._use_feature_popart = use_feature_popart
        self._use_orthogonal = use_orthogonal
        self._layer_N = layer_N
        self._use_ReLU = use_ReLU
        self._attn = attn

        assert (
            self._use_feature_normlization and self._use_feature_popart
        ) == False, (
            "--use_feature_normlization and --use_feature_popart can not be set True simultaneously."
        )

        obs_dim = obs_shape[0]
        share_obs_dim = share_obs_shape[0]

        if self._use_feature_popart:
            self.actor_norm = PopArt(obs_dim)
            self.critic_norm = PopArt(share_obs_dim)

        if self._use_feature_normlization:
            self.actor_norm = nn.LayerNorm(obs_dim)
            self.critic_norm = nn.LayerNorm(share_obs_dim)

        if self._attn:
            if use_average_pool == True:
                num_inputs_actor = attn_size + obs_shape[-1][1]
                num_inputs_critic = attn_size
            else:
                num_inputs = 0
                split_shape = obs_shape[1:]
                for i in range(len(split_shape)):
                    num_inputs += split_shape[i][0]
                num_inputs_critic = 0
                split_shape_critic = share_obs_shape[1:]
                for i in range(len(split_shape_critic)):
                    num_inputs_critic += split_shape_critic[i][0]
                num_inputs_actor = num_inputs * attn_size
                num_inputs_critic = num_inputs_critic * attn_size
            self.actor_attn_norm = nn.LayerNorm(num_inputs_actor)
            self.critic_attn_norm = nn.LayerNorm(num_inputs_critic)
        else:
            num_inputs_actor = obs_dim
            num_inputs_critic = share_obs_dim

        if self._use_orthogonal:
            if self._use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if self._use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        self.actor = MLPLayer(num_inputs_actor, hidden_size, self._layer_N,
                              self._use_orthogonal, self._use_ReLU)
        self.critic = MLPLayer(num_inputs_critic, hidden_size, self._layer_N,
                               self._use_orthogonal, self._use_ReLU)

        if self._use_common_layer:
            self.actor = nn.Sequential(
                init_(nn.Linear(num_inputs_actor, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.critic = nn.Sequential(
                init_(nn.Linear(num_inputs_critic, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.fc_h = nn.Sequential(
                init_(nn.Linear(hidden_size, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.common_linear = get_clones(self.fc_h, self._layer_N)

        self.actor_rnn_norm = nn.LayerNorm(hidden_size)
        self.critic_rnn_norm = nn.LayerNorm(hidden_size)

        self.critic_linear = init_(nn.Linear(hidden_size, 1))
Exemplo n.º 8
0
    def __init__(self,
                 obs_shape,
                 num_agents,
                 naive_recurrent=False,
                 recurrent=False,
                 hidden_size=64,
                 attn=False,
                 attn_size=512,
                 attn_N=2,
                 attn_heads=8,
                 dropout=0.05,
                 use_average_pool=True,
                 use_common_layer=False,
                 use_feature_normlization=False,
                 use_feature_popart=False,
                 use_orthogonal=True,
                 layer_N=1,
                 use_ReLU=False):
        super(CNNBase,
              self).__init__(obs_shape, num_agents, naive_recurrent, recurrent,
                             hidden_size, attn, attn_size, attn_N, attn_heads,
                             dropout, use_average_pool, use_common_layer,
                             use_orthogonal)

        self._use_common_layer = use_common_layer
        self._use_orthogonal = use_orthogonal

        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0),
                                   nn.init.calculate_gain('relu'))
        else:
            init_ = lambda m: init(m,
                                   nn.init.xavier_uniform_,
                                   lambda x: nn.init.constant_(x, 0),
                                   gain=nn.init.calculate_gain('relu'))

        num_inputs = obs_shape[0]
        num_image = obs_shape[1]

        self.actor = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 3, stride=1)),
            nn.ReLU(),
            #init_(nn.Conv2d(32, 64, 3, stride=1)), nn.ReLU(),
            #init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1),
                          hidden_size)),
            nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU())

        self.critic = nn.Sequential(
            init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)),
            nn.ReLU(),
            #init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            #init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1),
                          hidden_size)),
            nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
        )

        if self._use_common_layer:
            self.actor = nn.Sequential(
                init_(nn.Conv2d(num_inputs, 32, 3, stride=1)), nn.ReLU())
            self.critic = nn.Sequential(
                init_(nn.Conv2d(num_inputs * num_agents, 32, 3, stride=1)),
                nn.ReLU())
            self.common_linear = nn.Sequential(
                Flatten(),
                init_(
                    nn.Linear(32 * (num_image - 3 + 1) * (num_image - 3 + 1),
                              hidden_size)), nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU())

        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))
Exemplo n.º 9
0
    def __init__(self,
                 obs_shape,
                 num_agents,
                 naive_recurrent=False,
                 recurrent=False,
                 hidden_size=64,
                 recurrent_N=1,
                 attn=False,
                 attn_only_critic=False,
                 attn_size=512,
                 attn_N=2,
                 attn_heads=8,
                 dropout=0.05,
                 use_average_pool=True,
                 use_common_layer=False,
                 use_feature_normlization=True,
                 use_feature_popart=True,
                 use_orthogonal=True,
                 layer_N=1,
                 use_ReLU=False,
                 use_same_dim=False):
        super(MLPBase,
              self).__init__(obs_shape, num_agents, naive_recurrent, recurrent,
                             hidden_size, recurrent_N, attn, attn_only_critic,
                             attn_size, attn_N, attn_heads, dropout,
                             use_average_pool, use_common_layer,
                             use_orthogonal, use_ReLU, use_same_dim)

        self._use_common_layer = use_common_layer
        self._use_feature_normlization = use_feature_normlization
        self._use_feature_popart = use_feature_popart
        self._use_orthogonal = use_orthogonal
        self._layer_N = layer_N
        self._use_ReLU = use_ReLU
        self._use_same_dim = use_same_dim
        self._attn = attn
        self._attn_only_critic = attn_only_critic

        assert (
            self._use_feature_normlization and self._use_feature_popart
        ) == False, (
            "--use_feature_normlization and --use_feature_popart can not be set True simultaneously."
        )
        if 'int' not in obs_shape[0].__class__.__name__:  # mixed obs
            all_obs_space = obs_shape
            agent_id = num_agents
            num_agents = len(all_obs_space)
            if all_obs_space[agent_id].__class__.__name__ == "Box":
                obs_shape = all_obs_space[agent_id].shape
            else:
                obs_shape = all_obs_space[agent_id]
            share_obs_dim = 0
            for obs_space in all_obs_space:
                share_obs_dim += obs_space.shape[0]
        else:
            if self._use_same_dim:
                share_obs_dim = obs_shape[0]
            else:
                share_obs_dim = obs_shape[0] * num_agents

        if self._use_feature_popart:
            self.actor_norm = PopArt(obs_shape[0])
            self.critic_norm = PopArt(share_obs_dim)

        if self._use_feature_normlization:
            self.actor_norm = nn.LayerNorm(obs_shape[0])
            self.critic_norm = nn.LayerNorm(share_obs_dim)

        if self._attn:
            if use_average_pool == True:
                num_inputs_actor = attn_size + obs_shape[-1][1]
                if self._use_same_dim:
                    num_inputs_critic = attn_size + obs_shape[-1][1]
                else:
                    num_inputs_critic = attn_size
            else:
                num_inputs = 0
                split_shape = obs_shape[1:]
                for i in range(len(split_shape)):
                    num_inputs += split_shape[i][0]
                num_inputs_actor = num_inputs * attn_size
                if self._use_same_dim:
                    num_inputs_critic = num_inputs * attn_size
                else:
                    num_inputs_critic = num_agents * attn_size

            self.actor_attn_norm = nn.LayerNorm(num_inputs_actor)
            self.critic_attn_norm = nn.LayerNorm(num_inputs_critic)

        elif self._attn_only_critic:
            num_inputs_actor = obs_shape[0]
            if use_average_pool == True:
                num_inputs_critic = attn_size
            else:
                num_inputs_critic = num_agents * attn_size
            self.critic_attn_norm = nn.LayerNorm(num_inputs_critic)
        else:
            num_inputs_actor = obs_shape[0]
            num_inputs_critic = share_obs_dim

        if self._use_orthogonal:
            if self._use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if self._use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        self.actor = MLPLayer(num_inputs_actor, hidden_size, self._layer_N,
                              self._use_orthogonal, self._use_ReLU)
        self.critic = MLPLayer(num_inputs_critic, hidden_size, self._layer_N,
                               self._use_orthogonal, self._use_ReLU)

        if self._use_common_layer:
            self.actor = nn.Sequential(
                init_(nn.Linear(num_inputs_actor, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.critic = nn.Sequential(
                init_(nn.Linear(num_inputs_critic, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.fc_h = nn.Sequential(
                init_(nn.Linear(hidden_size, hidden_size)), active_func,
                nn.LayerNorm(hidden_size))
            self.common_linear = get_clones(self.fc_h, self._layer_N)

        self.actor_rnn_norm = nn.LayerNorm(hidden_size)
        self.critic_rnn_norm = nn.LayerNorm(hidden_size)

        self.critic_linear = init_(nn.Linear(hidden_size, 1))