Exemplo n.º 1
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=None,
                 output_W_init=LI.GlorotUniform(),
                 name=None,
                 input_var=None,
                 input_layer=None,
                 trunc_steps=20,
                 output_gain=1):
        if input_layer is None:
            l_in = L.InputLayer(shape=(None, None) + input_shape,
                                input_var=input_var,
                                name="input")
        else:
            l_in = input_layer
        l_step_input = L.InputLayer(shape=(None, ) + input_shape)
        l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
        l_gru = GRULayer(l_in,
                         num_units=hidden_dim,
                         hidden_nonlinearity=hidden_nonlinearity,
                         hidden_init_trainable=False,
                         trunc_steps=trunc_steps)
        l_gru_flat = L.ReshapeLayer(l_gru, shape=(-1, hidden_dim))
        l_output_flat = L.DenseLayer(
            l_gru_flat,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            W=output_W_init,
        )
        l_output = OpLayer(
            l_output_flat,
            op=lambda flat_output, l_input: flat_output.reshape(
                (l_input.shape[0], l_input.shape[1], -1)),
            shape_op=lambda flat_output_shape, l_input_shape:
            (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
            extras=[l_in])
        l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden)
        l_step_output = L.DenseLayer(
            l_step_hidden,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            W=l_output_flat.W,
            b=l_output_flat.b,
        )

        self._l_in = l_in
        self._hid_init_param = l_gru.h0
        self._l_gru = l_gru
        self._l_out = l_output
        self._l_step_input = l_step_input
        self._l_step_prev_hidden = l_step_prev_hidden
        self._l_step_hidden = l_step_hidden
        self._l_step_output = l_step_output
Exemplo n.º 2
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 hidden_nonlinearity=NL.rectify,
                 output_nonlinearity=None,
                 name=None,
                 input_var=None,
                 output_b_init=LI.Constant(0.)):
        l_in = L.InputLayer(shape=(None, None) + input_shape,
                            input_var=input_var)
        l_step_input = L.InputLayer(shape=(None, ) + input_shape)
        l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
        l_gru = GRULayer(l_in,
                         num_units=hidden_dim,
                         hidden_nonlinearity=hidden_nonlinearity,
                         hidden_init_trainable=False)
        l_gru_flat = L.ReshapeLayer(l_gru, shape=(-1, hidden_dim))
        l_output_flat = L.DenseLayer(l_gru_flat,
                                     num_units=output_dim,
                                     nonlinearity=output_nonlinearity,
                                     b=output_b_init)
        l_output = OpLayer(
            l_output_flat,
            op=lambda flat_output, l_input: flat_output.reshape(
                (l_input.shape[0], l_input.shape[1], -1)),
            shape_op=lambda flat_output_shape, l_input_shape:
            (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
            extras=[l_in])
        l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden)
        l_step_output = L.DenseLayer(
            l_step_hidden,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            W=l_output_flat.W,
            b=l_output_flat.b,
        )

        self._l_in = l_in
        self._hid_init_param = l_gru.h0
        self._l_gru = l_gru
        self._l_out = l_output
        self._l_step_input = l_step_input
        self._l_step_prev_hidden = l_step_prev_hidden
        self._l_step_hidden = l_step_hidden
        self._l_step_output = l_step_output
Exemplo n.º 3
0
 def __init__(self, input_shape, output_dim, hidden_dim, hidden_nonlinearity=NL.tanh,
              output_nonlinearity=None, name=None, input_var=None, input_layer=None, **kwargs):
     if input_layer is None:
         l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
     else:
         l_in = input_layer
     l_step_input = L.InputLayer(shape=(None,) + input_shape)
     l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
     l_recurrent = self.create_recurrent_layer(l_in, hidden_dim, hidden_nonlinearity=hidden_nonlinearity, **kwargs)
     l_recurrent_flat = L.ReshapeLayer(
         l_recurrent, shape=(-1, hidden_dim)
     )
     l_output_flat = L.DenseLayer(
         l_recurrent_flat,
         num_units=output_dim,
         nonlinearity=output_nonlinearity,
     )
     l_output = OpLayer(
         l_output_flat,
         op=lambda flat_output, l_input: flat_output.reshape((l_input.shape[0], l_input.shape[1], -1)),
         shape_op=lambda flat_output_shape, l_input_shape: (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
         extras=[l_in]
     )
     l_step_hidden = l_recurrent.get_step_layer(l_step_input, l_step_prev_hidden)
     l_step_output = L.DenseLayer(
         l_step_hidden,
         num_units=output_dim,
         nonlinearity=output_nonlinearity,
         W=l_output_flat.W,
         b=l_output_flat.b,
     )
     #print(theano.printing.debugprint(L.get_output(l_step_output)))
     self._l_in = l_in
     self._hid_init_param = l_recurrent.h0
     self._l_recurrent = l_recurrent
     self._l_out = l_output
     self._l_step_input = l_step_input
     self._l_step_prev_hidden = l_step_prev_hidden
     self._l_step_hidden = l_step_hidden
     self._l_step_output = l_step_output
    def __init__(
            self,
            env_spec,
            hidden_dim=32,
            feature_network=None,
            state_include_action=True,
            hidden_nonlinearity=NL.tanh):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        assert isinstance(env_spec.action_space, Discrete)
        Serializable.quick_init(self, locals())
        super(CategoricalGRUPolicy, self).__init__(env_spec)

        obs_dim = env_spec.observation_space.flat_dim
        action_dim = env_spec.action_space.flat_dim

        if state_include_action:
            input_dim = obs_dim + action_dim
        else:
            input_dim = obs_dim

        l_input = L.InputLayer(
            shape=(None, None, input_dim),
            name="input"
        )

        if feature_network is None:
            feature_dim = input_dim
            l_flat_feature = None
            l_feature = l_input
        else:
            feature_dim = feature_network.output_layer.output_shape[-1]
            l_flat_feature = feature_network.output_layer
            l_feature = OpLayer(
                l_flat_feature,
                extras=[l_input],
                name="reshape_feature",
                op=lambda flat_feature, input: TT.reshape(
                    flat_feature,
                    [input.shape[0], input.shape[1], feature_dim]
                ),
                shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)
            )

        prob_network = GRUNetwork(
            input_shape=(feature_dim,),
            input_layer=l_feature,
            output_dim=env_spec.action_space.n,
            hidden_dim=hidden_dim,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=TT.nnet.softmax,
            name="prob_network"
        )

        self.prob_network = prob_network
        self.feature_network = feature_network
        self.l_input = l_input
        self.state_include_action = state_include_action

        flat_input_var = TT.matrix("flat_input")
        if feature_network is None:
            feature_var = flat_input_var
        else:
            feature_var = L.get_output(l_flat_feature, {feature_network.input_layer: flat_input_var})

        self.f_step_prob = ext.compile_function(
            [
                flat_input_var,
                prob_network.step_prev_hidden_layer.input_var
            ],
            L.get_output([
                prob_network.step_output_layer,
                prob_network.step_hidden_layer
            ], {prob_network.step_input_layer: feature_var})
        )

        self.input_dim = input_dim
        self.action_dim = action_dim
        self.hidden_dim = hidden_dim

        self.prev_action = None
        self.prev_hidden = None
        self.dist = RecurrentCategorical(env_spec.action_space.n)

        out_layers = [prob_network.output_layer]
        if feature_network is not None:
            out_layers.append(feature_network.output_layer)

        LasagnePowered.__init__(self, out_layers)