Пример #1
0
    def __init__(self,
                 n_actions,
                 n_atoms,
                 v_min,
                 v_max,
                 n_input_channels=4,
                 activation=F.relu,
                 bias=0.1):
        assert n_atoms >= 2
        assert v_min < v_max

        self.n_actions = n_actions
        self.n_input_channels = n_input_channels
        self.activation = activation
        self.n_atoms = n_atoms

        super().__init__()
        z_values = np.linspace(v_min, v_max, num=n_atoms, dtype=np.float32)
        self.add_persistent('z_values', z_values)

        with self.init_scope():
            self.conv_layers = chainer.ChainList(
                L.Convolution2D(n_input_channels,
                                32,
                                8,
                                stride=4,
                                initial_bias=bias),
                L.Convolution2D(32, 64, 4, stride=2, initial_bias=bias),
                L.Convolution2D(64, 64, 3, stride=1, initial_bias=bias))

            self.a_stream = MLP(3136, n_actions * n_atoms, [512])
            self.v_stream = MLP(3136, n_atoms, [512])
Пример #2
0
    def __init__(self,
                 n_dim_obs,
                 n_dim_action,
                 n_hidden_channels,
                 n_hidden_layers,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        assert n_hidden_layers >= 1
        self.n_input_channels = n_dim_obs + n_dim_action
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.nonlinearity = nonlinearity

        super().__init__()
        with self.init_scope():
            # No need to pass nonlinearity to obs_mlp because it has no
            # hidden layers
            self.obs_mlp = MLP(in_size=n_dim_obs,
                               out_size=n_hidden_channels,
                               hidden_sizes=[])
            self.mlp = MLP(
                in_size=n_hidden_channels + n_dim_action,
                out_size=1,
                hidden_sizes=([self.n_hidden_channels] *
                              (self.n_hidden_layers - 1)),
                nonlinearity=nonlinearity,
                last_wscale=last_wscale,
            )

        self.output = self.mlp.output
    def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels,
                 n_hidden_layers):
        self.n_input_channels = n_dim_obs + n_dim_action
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels

        super().__init__(obs_mlp=MLP(in_size=n_dim_obs,
                                     out_size=n_hidden_channels,
                                     hidden_sizes=[]),
                         mlp=MLP(in_size=n_hidden_channels + n_dim_action,
                                 out_size=1,
                                 hidden_sizes=([self.n_hidden_channels] *
                                               (self.n_hidden_layers - 1))))
        self.output = self.mlp.output
Пример #4
0
    def __init__(self, n_input_channels, n_hidden_layers,
                 n_hidden_channels, action_size,
                 min_action=None, max_action=None, bound_action=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            def action_filter(x):
                return bound_by_tanh(
                    x, self.min_action, self.max_action)
        else:
            action_filter = None

        super().__init__(
            model=MLP(n_input_channels,
                      action_size,
                      (n_hidden_channels,) * n_hidden_layers,
                      nonlinearity=nonlinearity,
                      last_wscale=last_wscale,
                      ),
            action_filter=action_filter)
Пример #5
0
    def __init__(self, n_actions, n_input_channels=4,
                 activation=F.relu, bias=0.1):
        self.n_actions = n_actions
        self.n_input_channels = n_input_channels
        self.activation = activation

        super().__init__()
        with self.init_scope():
            self.conv_layers = chainer.ChainList(
                L.Convolution2D(n_input_channels, 32, 8, stride=4,
                                initial_bias=bias),
                L.Convolution2D(32, 64, 4, stride=2, initial_bias=bias),
                L.Convolution2D(64, 64, 3, stride=1, initial_bias=bias))

            self.a_stream = MLP(3136, n_actions, [512])
            self.v_stream = MLP(3136, 1, [512])
Пример #6
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            action_filter = lambda x: bound_by_tanh(x, self.min_action, self.
                                                    max_action)
        else:
            action_filter = None

        super().__init__(model=MLP(n_input_channels, action_size,
                                   (n_hidden_channels, ) * n_hidden_layers),
                         action_filter=action_filter)
Пример #7
0
 def __init__(self, ndim_obs, n_actions, n_hidden_channels,
              n_hidden_layers, nonlinearity=F.relu,
              last_wscale=1.0):
     super().__init__(model=MLP(
         in_size=ndim_obs, out_size=n_actions,
         hidden_sizes=[n_hidden_channels] * n_hidden_layers,
         nonlinearity=nonlinearity,
         last_wscale=last_wscale))
Пример #8
0
    def __init__(self,
                 n_actions,
                 n_input_channels=4,
                 activation=F.relu,
                 bias=0.1):
        self.n_actions = n_actions
        self.n_input_channels = n_input_channels
        self.activation = activation

        conv_layers = chainer.ChainList(
            L.Convolution2D(n_input_channels, 32, 8, stride=4, bias=bias),
            L.Convolution2D(32, 64, 4, stride=2, bias=bias),
            L.Convolution2D(64, 64, 3, stride=1, bias=bias))

        a_stream = MLP(3136, n_actions, [512])
        v_stream = MLP(3136, 1, [512])

        super().__init__(conv_layers=conv_layers,
                         a_stream=a_stream,
                         v_stream=v_stream)
Пример #9
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers=0,
                 n_hidden_channels=None):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels

        super().__init__(
            model=MLP(self.n_input_channels, 1,
                      [self.n_hidden_channels] * self.n_hidden_layers), )
Пример #10
0
 def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels,
              n_hidden_layers):
     self.n_input_channels = n_dim_obs + n_dim_action
     self.n_hidden_layers = n_hidden_layers
     self.n_hidden_channels = n_hidden_channels
     super().__init__()
     with self.init_scope():
         self.fc = MLP(self.n_input_channels, n_hidden_channels,
                       [self.n_hidden_channels] * self.n_hidden_layers)
         self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels)
         self.out = L.Linear(n_hidden_channels, 1)
 def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels,
              n_hidden_layers):
     self.n_input_channels = n_dim_obs + n_dim_action
     self.n_hidden_layers = n_hidden_layers
     self.n_hidden_channels = n_hidden_channels
     super().__init__(
         fc=MLP(self.n_input_channels, n_hidden_channels,
                [self.n_hidden_channels] * self.n_hidden_layers),
         lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
         out=L.Linear(n_hidden_channels, 1),
     )
Пример #12
0
 def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels,
              n_hidden_layers):
     self.n_input_channels = n_dim_obs
     self.n_hidden_layers = n_hidden_layers
     self.n_hidden_channels = n_hidden_channels
     self.state_stack = []
     super().__init__(fc=MLP(in_size=self.n_input_channels,
                             out_size=n_hidden_channels,
                             hidden_sizes=[self.n_hidden_channels] *
                             self.n_hidden_layers),
                      lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
                      out=L.Linear(n_hidden_channels, n_dim_action))
Пример #13
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 nonlinearity=F.relu,
                 last_wscale=1):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels

        super().__init__(model=MLP(self.n_input_channels,
                                   1, [self.n_hidden_channels] *
                                   self.n_hidden_layers,
                                   nonlinearity=nonlinearity,
                                   last_wscale=last_wscale), )
 def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels,
              n_hidden_layers, nonlinearity=F.relu, last_wscale=1.):
     self.n_input_channels = n_dim_obs + n_dim_action
     self.n_hidden_layers = n_hidden_layers
     self.n_hidden_channels = n_hidden_channels
     self.nonlinearity = nonlinearity
     super().__init__()
     with self.init_scope():
         self.fc = MLP(self.n_input_channels, n_hidden_channels,
                       [self.n_hidden_channels] * self.n_hidden_layers,
                       nonlinearity=nonlinearity,
                       )
         self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels)
         self.out = L.Linear(n_hidden_channels, 1,
                             initialW=LeCunNormal(last_wscale))
Пример #15
0
 def __init__(self, ndim_obs, n_actions, n_atoms, v_min, v_max,
              n_hidden_channels, n_hidden_layers,
              nonlinearity=F.relu, last_wscale=1.0):
     assert n_atoms >= 2
     assert v_min < v_max
     z_values = np.linspace(v_min, v_max, num=n_atoms, dtype=np.float32)
     model = chainerrl.links.Sequence(
         MLP(in_size=ndim_obs, out_size=n_actions * n_atoms,
             hidden_sizes=[n_hidden_channels] * n_hidden_layers,
             nonlinearity=nonlinearity,
             last_wscale=last_wscale),
         lambda x: F.reshape(x, (-1, n_actions, n_atoms)),
         lambda x: F.softmax(x, axis=2),
     )
     super().__init__(model=model, z_values=z_values)
Пример #16
0
    def __init__(self,
                 n_input_channels,
                 n_actions,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 beta=1.0):
        self.n_input_channels = n_input_channels
        self.n_actions = n_actions
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.beta = beta

        super().__init__(model=MLP(n_input_channels, n_actions,
                                   (n_hidden_channels, ) * n_hidden_layers),
                         beta=self.beta)
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:

            def action_filter(x):
                return bound_by_tanh(x, self.min_action, self.max_action)
        else:
            action_filter = None

        model = chainer.Chain(
            fc=MLP(
                self.n_input_channels,
                n_hidden_channels,
                (self.n_hidden_channels, ) * self.n_hidden_layers,
                nonlinearity=nonlinearity,
            ),
            lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
            out=L.Linear(n_hidden_channels,
                         action_size,
                         initialW=LeCunNormal(last_wscale)),
        )

        def model_call(model, x):
            h = nonlinearity(model.fc(x))
            h = model.lstm(h)
            h = model.out(h)
            return h

        super().__init__(model=model,
                         model_call=model_call,
                         action_filter=action_filter)
Пример #18
0
    def __init__(self, n_input_channels, n_actions,
                 n_hidden_layers=0, n_hidden_channels=None,
                 beta=1.0, nonlinearity=F.relu,
                 last_wscale=1.0,
                 min_prob=0.0):
        self.n_input_channels = n_input_channels
        self.n_actions = n_actions
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.beta = beta

        super().__init__(
            model=MLP(n_input_channels,
                      n_actions,
                      (n_hidden_channels,) * n_hidden_layers,
                      nonlinearity=nonlinearity,
                      last_wscale=last_wscale),
            beta=self.beta,
            min_prob=min_prob)
Пример #19
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            action_filter = lambda x: bound_by_tanh(x, self.min_action, self.
                                                    max_action)
        else:
            action_filter = None

        model = chainer.Chain(
            fc=MLP(self.n_input_channels, n_hidden_channels,
                   (self.n_hidden_channels, ) * self.n_hidden_layers),
            lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
            out=L.Linear(n_hidden_channels, action_size),
        )

        def model_call(model, x):
            h = F.relu(model.fc(x))
            h = model.lstm(h)
            h = model.out(h)
            return h

        super().__init__(model=model,
                         model_call=model_call,
                         action_filter=action_filter)
Пример #20
0
    def __init__(self,
                 n_dim_obs,
                 n_dim_action,
                 n_hidden_channels,
                 n_hidden_layers,
                 normalize_input=True):
        self.n_input_channels = n_dim_obs + n_dim_action
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.normalize_input = normalize_input

        super().__init__()
        with self.init_scope():
            self.obs_mlp = MLPBN(in_size=n_dim_obs,
                                 out_size=n_hidden_channels,
                                 hidden_sizes=[],
                                 normalize_input=normalize_input,
                                 normalize_output=True)
            self.mlp = MLP(in_size=n_hidden_channels + n_dim_action,
                           out_size=1,
                           hidden_sizes=([self.n_hidden_channels] *
                                         (self.n_hidden_layers - 1)))

        self.output = self.mlp.output
Пример #21
0
 def __init__(self, ndim_obs, n_actions, n_hidden_channels,
              n_hidden_layers):
     super().__init__(model=MLP(in_size=ndim_obs,
                                out_size=n_actions,
                                hidden_sizes=[n_hidden_channels] *
                                n_hidden_layers))