Exemplo n.º 1
0
    def __init__(self,
                 num_inputs,
                 vector_obs_len=0,
                 recurrent=False,
                 hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size + vector_obs_len,
                                      hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        if recurrent:
            self.critic_linear = init_(nn.Linear(hidden_size, 1))
        else:
            self.critic_linear = init_(
                nn.Linear(hidden_size + vector_obs_len, 1))

        self.train()
Exemplo n.º 2
0
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Exemplo n.º 3
0
    def __init__(self, num_inputs, hidden_size, num_layers, recurrent, activation):
        assert num_layers > 0
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0))

        self.actor = nn.Sequential()
        self.critic = nn.Sequential()
        for i in range(num_layers):
            self.actor.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init_(nn.Linear(num_inputs, hidden_size)), activation
                ),
            )
            self.critic.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init_(nn.Linear(num_inputs, hidden_size)), activation
                ),
            )
            num_inputs = hidden_size

        self.critic_linear = init_(nn.Linear(num_inputs, 1))

        self.train()
Exemplo n.º 4
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Exemplo n.º 5
0
    def __init__(self, num_inputs, num_outputs):
        super(Categorical, self).__init__()

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
    def __init__(self, num_obs_inputs, num_act_inputs, hidden_size=64):
        super(MetaMLP, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.meta_reward = nn.Sequential(
            init_(nn.Linear(num_obs_inputs + num_act_inputs, hidden_size)),
            nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, 1)),
            nn.Tanh())  # added tanh like in paper

        self.meta_critic = nn.Sequential(
            init_(nn.Linear(num_obs_inputs, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, 1)))

        self.train()
Exemplo n.º 7
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=64):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(
            init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic = nn.Sequential(
            init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, hidden_size=64):
        super(PolicyMLP, self).__init__()

        self.hidden_size = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.ReLU(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.ReLU())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.ReLU(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.ReLU())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Exemplo n.º 9
0
    def __init__(
        self,
        hidden_size,
        num_layers,
        recurrent,
        obs_space,
        num_conv_layers,
        kernel_size,
        stride,
        activation=nn.ReLU(),
        **_,
    ):
        if type(obs_space) is spaces.Dict:
            obs_space = Obs(**obs_space.spaces)
        assert num_layers > 0
        H = hidden_size
        super().__init__(
            recurrent=recurrent, recurrent_input_size=H, hidden_size=hidden_size
        )
        self.register_buffer(
            "subtasks",
            torch.tensor(
                [Env.preprocess_line(Subtask(s)) for s in subtasks()] + [[0, 0, 0, 0]]
            ),
        )
        (d, h, w) = obs_space.obs.shape
        inventory_size = obs_space.inventory.n
        line_nvec = torch.tensor(obs_space.lines.nvec)
        offset = F.pad(line_nvec[0, :-1].cumsum(0), [1, 0])
        self.register_buffer("offset", offset)
        self.obs_spaces = obs_space
        self.obs_sections = get_obs_sections(self.obs_spaces)
        padding = (kernel_size // 2) % stride

        self.conv = nn.Sequential()
        in_size = d
        assert num_conv_layers > 0
        for i in range(num_conv_layers):
            self.conv.add_module(
                name=f"conv{i}",
                module=nn.Sequential(
                    init_(
                        nn.Conv2d(
                            in_size,
                            hidden_size,
                            kernel_size=kernel_size,
                            stride=stride,
                            padding=padding,
                        )
                    ),
                    activation,
                ),
            )
            in_size = hidden_size
            h = w = (h + (2 * padding) - (kernel_size - 1) - 1) // stride + 1
            kernel_size = min(h, kernel_size)
        self.conv.add_module(name="flatten", module=Flatten())
        init2 = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0))

        self.conv_projection = nn.Sequential(
            init2(nn.Linear(h * w * hidden_size, hidden_size)), activation
        )
        self.line_embed = nn.EmbeddingBag(line_nvec[0].sum(), hidden_size)
        self.inventory_embed = nn.Sequential(
            init2(nn.Linear(inventory_size, hidden_size)), activation
        )

        self.mlp = nn.Sequential()
        in_size = hidden_size if recurrent else H
        for i in range(num_layers):
            self.mlp.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init2(nn.Linear(in_size, hidden_size)), activation
                ),
            )
            in_size = hidden_size

        self.critic_linear = init2(nn.Linear(in_size, 1))
        self._output_size = in_size
        self.train()