Exemplo n.º 1
0
    def __init__(self, name, input_shape, output_dim, hidden_dim, hidden_nonlinearity=tf.nn.relu,
                 lstm_layer_cls=L.LSTMLayer,
                 output_nonlinearity=None, input_var=None, input_layer=None, forget_bias=1.0, use_peepholes=False,
                 layer_args=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            l_step_input = L.InputLayer(shape=(None,) + input_shape, name="step_input")
            # contains previous hidden and cell state
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim * 2), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_lstm = lstm_layer_cls(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
                                    hidden_init_trainable=False, name="lstm", forget_bias=forget_bias,
                                    cell_init_trainable=False, use_peepholes=use_peepholes, **layer_args)
            l_lstm_flat = L.ReshapeLayer(
                l_lstm, shape=(-1, hidden_dim),
                name="lstm_flat"
            )
            l_output_flat = L.DenseLayer(
                l_lstm_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat"
            )
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input:
                tf.reshape(flat_output, tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output"
            )
            l_step_state = l_lstm.get_step_layer(l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = L.SliceLayer(l_step_state, indices=slice(hidden_dim), name="step_hidden")
            l_step_cell = L.SliceLayer(l_step_state, indices=slice(hidden_dim, None), name="step_cell")
            l_step_output = L.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                W=l_output_flat.W,
                b=l_output_flat.b,
                name="step_output"
            )

            self._l_in = l_in
            self._hid_init_param = l_lstm.h0
            self._cell_init_param = l_lstm.c0
            self._l_lstm = l_lstm
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_cell = l_step_cell
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Exemplo n.º 2
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 input_var=None,
                 input_layer=None,
                 qmdp_param=None):
        with tf.variable_scope(name):
            hidden_dim = qmdp_param['grid_n'] * qmdp_param['grid_m']
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_step_input = L.InputLayer(shape=(None, ) + input_shape,
                                        name="step_input")
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim),
                                             name="step_prev_state")

            hidden_dim = qmdp_param['grid_n'] * qmdp_param['grid_m']
            l_gru = FilterLayer(l_in, qmdp_param, name="qmdp_filter")

            l_gru_flat = L.ReshapeLayer(l_gru,
                                        shape=(-1, hidden_dim),
                                        name="gru_flat")
            l_output_flat = PlannerLayer(l_gru_flat,
                                         qmdp_param,
                                         name="output_flat")

            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack(
                        (tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")
            l_step_state = l_gru.get_step_layer(l_step_input,
                                                l_step_prev_state,
                                                name="step_state")
            l_step_hidden = l_step_state
            l_step_output = l_output_flat.get_step_layer(l_step_hidden,
                                                         name="step_output")

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_output_flat = l_output_flat
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Exemplo n.º 3
0
    def __init__(self,
                 env_spec,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 bn=False,
                 dropout=.05):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim),
                             name="obs")
        l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim),
                                name="actions")

        n_layers = len(hidden_sizes) + 1

        if n_layers > 1:
            action_merge_layer = \
                (action_merge_layer % n_layers + n_layers) % n_layers
        else:
            action_merge_layer = 1

        l_hidden = l_obs

        for idx, size in enumerate(hidden_sizes):
            if bn:
                l_hidden = batch_norm(l_hidden)

            if idx == action_merge_layer:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_hidden = L.DenseLayer(l_hidden,
                                    num_units=size,
                                    nonlinearity=hidden_nonlinearity,
                                    name="h%d" % (idx + 1))
            l_hidden = L.DropoutLayer(l_hidden, dropout)

        if action_merge_layer == n_layers:
            l_hidden = L.ConcatLayer([l_hidden, l_action])

        l_output = L.DenseLayer(l_hidden,
                                num_units=1,
                                nonlinearity=output_nonlinearity,
                                name="output")

        output_var = L.get_output(l_output, deterministic=True)
        output_var_drop = L.get_output(l_output, deterministic=False)

        self._f_qval = tensor_utils.compile_function(
            [l_obs.input_var, l_action.input_var], output_var)
        self._f_qval_drop = tensor_utils.compile_function(
            [l_obs.input_var, l_action.input_var], output_var_drop)
        self._output_layer = l_output
        self._obs_layer = l_obs
        self._action_layer = l_action
        self._output_nonlinearity = output_nonlinearity

        LayersPowered.__init__(self, [l_output])
Exemplo n.º 4
0
    def __init__(self, name, input_shape, output_dim, hidden_dim, hidden_nonlinearity=tf.nn.relu,
                 gru_layer_cls=L.GRULayer,
                 output_nonlinearity=None, input_var=None, input_layer=None, layer_args=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            l_step_input = L.InputLayer(shape=(None,) + input_shape, name="step_input")
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_gru = gru_layer_cls(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
                                  hidden_init_trainable=False, name="gru", **layer_args)
            l_gru_flat = L.ReshapeLayer(
                l_gru, shape=(-1, hidden_dim),
                name="gru_flat"
            )
            l_output_flat = L.DenseLayer(
                l_gru_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat"
            )
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input:
                tf.reshape(flat_output, tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output"
            )
            l_step_state = l_gru.get_step_layer(l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = l_step_state
            l_step_output = L.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                W=l_output_flat.W,
                b=l_output_flat.b,
                name="step_output"
            )

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Exemplo n.º 5
0
    def __init__(self,
                 *,
                 env_spec,
                 common_network_cls,
                 common_network_args,
                 state_input_dim,
                 state_network_cls=None,
                 state_network_args=dict(),
                 action_network_cls=None,
                 action_network_args=dict()):
        Serializable.quick_init(self, locals())

        logger.log('Reconciler: {}'.format(locals()))

        self.env_spec = env_spec

        if state_network_cls is not None:
            state_network_args[
                'input_shape'] = env_spec.observation_space.shape
            state_network = state_network_cls(**state_network_args)
            self.state_input_layer = state_network.input_layer
            state_processed_layer = state_network.output_layer
        else:
            self.state_input_layer = L.InputLayer(shape=(None,
                                                         state_input_dim),
                                                  input_var=None,
                                                  name='input_state')
            state_processed_layer = self.state_input_layer

        if action_network_cls is not None:
            action_network_args['input_shape'] = (
                env_spec.action_space.flat_dim, )
            action_network = action_network_cls(**action_network_args)
            self.action_input_layer = action_network.input_layer
            action_processed_layer = action_network.output_layer
        else:
            self.action_input_layer = L.InputLayer(
                shape=(None, env_spec.action_space.flat_dim),
                input_var=None,
                name='input_action')
            action_processed_layer = self.action_input_layer

        concat_layer = L.concat(
            [L.flatten(state_processed_layer), action_processed_layer])

        common_network_args['input_layer'] = concat_layer
        common_network = common_network_cls(**common_network_args)

        self.output_layer = common_network.output_layer

        self.output_layers = [self.output_layer]
    def __init__(self,
                 policy_name,
                 env_spec,
                 latent_sampler,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.tanh,
                 prob_network=None):
        Serializable.quick_init(self, locals())
        name = policy_name
        self.latent_sampler = latent_sampler

        with tf.variable_scope(name):
            if prob_network is None:
                input_dim = env_spec.observation_space.flat_dim + self.latent_sampler.dim
                l_input = L.InputLayer(shape=(None, input_dim), name="input")
                prob_network = MLP(input_layer=l_input,
                                   output_dim=env_spec.action_space.n,
                                   hidden_sizes=hidden_sizes,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   output_nonlinearity=tf.nn.softmax,
                                   name="prob_network")
                self._output = prob_network.output
                self._inputs = prob_network.input_var

        super(CategoricalLatentVarMLPPolicy,
              self).__init__(name=name,
                             env_spec=env_spec,
                             prob_network=prob_network)
Exemplo n.º 7
0
    def __init__(
        self,
        name,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        output_nonlinearity,
        hidden_W_init=L.XavierUniformInitializer(),
        hidden_b_init=tf.zeros_initializer(),
        output_W_init=L.XavierUniformInitializer(),
        output_b_init=tf.zeros_initializer(),
        input_var=None,
        input_layer=None,
        input_shape=None,
        batch_normalization=False,
        weight_normalization=False,
    ):

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)
            if batch_normalization:
                l_out = L.batch_norm(l_out)
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Exemplo n.º 8
0
 def create_MLP(
     self,
     name,
     output_dim,
     hidden_sizes,
     hidden_nonlinearity,
     output_nonlinearity,
     hidden_W_init=L.XavierUniformInitializer(),
     hidden_b_init=tf.zeros_initializer,
     output_W_init=L.XavierUniformInitializer(),
     output_b_init=tf.zeros_initializer,
     input_var=None,
     input_layer=None,
     input_shape=None,
     batch_normalization=False,
     weight_normalization=False,
 ):
     with tf.variable_scope(name):
         if input_layer is None:
             l_in = L.InputLayer(shape=(None, ) + input_shape,
                                 input_var=input_var,
                                 name="input")
         else:
             l_in = input_layer
         all_layers = [l_in]
         l_hid = l_in
         if batch_normalization:
             l_hid = L.batch_norm(l_hid)
         for idx, hidden_size in enumerate(hidden_sizes):
             l_hid = L.DenseLayer(l_hid,
                                  num_units=hidden_size,
                                  nonlinearity=hidden_nonlinearity,
                                  name="hidden_%d" % idx,
                                  W=hidden_W_init,
                                  b=hidden_b_init,
                                  weight_normalization=weight_normalization)
             if batch_normalization:
                 l_hid = L.batch_norm(l_hid)
             all_layers.append(l_hid)
         l_out = L.DenseLayer(l_hid,
                              num_units=output_dim,
                              nonlinearity=output_nonlinearity,
                              name="output",
                              W=output_W_init,
                              b=output_b_init,
                              weight_normalization=weight_normalization)
         if batch_normalization:
             l_out = L.batch_norm(l_out)
         all_layers.append(l_out)
         output = L.get_output(l_out)
         # returns layers(), input_layer, output_layer, input_var, output
         return all_layers, l_in, l_out, l_in.input_var, output
    def make_network(self,
                     dim_input,
                     dim_output,
                     subnetwork_hidden_sizes,
                     discriminator_options=[],
                     hidden_nonlinearity=tf.nn.tanh,
                     gating_network=None,
                     l_in=None,
                     conv_filters=None,
                     conv_filter_sizes=None,
                     conv_strides=None,
                     conv_pads=None,
                     input_shape=None):
        if l_in is None:
            l_in = L.InputLayer(shape=(None, ) + tuple(dim_input))

        if len(discriminator_options) < self.num_options:
            for i in range(len(discriminator_options), self.num_options):
                subnet = self._make_subnetwork(
                    l_in,
                    dim_output=dim_output,
                    hidden_sizes=subnetwork_hidden_sizes,
                    output_nonlinearity=None,
                    hidden_nonlinearity=hidden_nonlinearity,
                    name="option%d" % i,
                    conv_filters=conv_filters,
                    conv_filter_sizes=conv_filter_sizes,
                    conv_strides=conv_strides,
                    conv_pads=conv_pads,
                    input_shape=input_shape)
                discriminator_options.append(subnet)

        # only apply softmax if we're doing mixtures, if sparse mixtures or options, need to apply after sparsifying
        if gating_network is None:
            gating_network = self._make_gating_network(
                l_in,
                apply_softmax=True,
                hidden_sizes=subnetwork_hidden_sizes,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                input_shape=input_shape)

        # combined_options = L.ConcatLayer(discriminator_options, axis=1)
        # combined_options = tf.concat(discriminator_options, axis=1)
        output = ElemwiseMultiplyReduceSoftmaxReshapeLayer(
            discriminator_options + [gating_network])

        # self.termination_importance_values = tf.reduce_sum(self.termination_softmax_logits, axis=0)

        return l_in, output
Exemplo n.º 10
0
    def __init__(self, name, input_shape, output_dim,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 hidden_sizes, hidden_nonlinearity, output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer(),
                 input_var=None, input_layer=None, batch_normalization=False, weight_normalization=False):
        Serializable.quick_init(self, locals())
        """
        A network composed of several convolution layers followed by some fc layers.
        input_shape: (width,height,channel)
            HOWEVER, network inputs are assumed flattened. This network will first unflatten the inputs and then apply the standard convolutions and so on.
        conv_filters: a list of numbers of convolution kernel
        conv_filter_sizes: a list of sizes (int) of the convolution kernels
        conv_strides: a list of strides (int) of the conv kernels
        conv_pads: a list of pad formats (either 'SAME' or 'VALID')
        hidden_nonlinearity: a nonlinearity from tf.nn, shared by all conv and fc layers
        hidden_sizes: a list of numbers of hidden units for all fc layers
        """
        with tf.variable_scope(name):
            if input_layer is not None:
                l_in = input_layer
                l_hid = l_in
            elif len(input_shape) == 3:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var, name="input")
                l_hid = L.reshape(l_in, ([0],) + input_shape, name="reshape_input")
            elif len(input_shape) == 2:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var, name="input")
                input_shape = (1,) + input_shape
                l_hid = L.reshape(l_in, ([0],) + input_shape, name="reshape_input")
            else:
                l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var, name="input")
                l_hid = l_in

            if batch_normalization:
                l_hid = L.batch_norm(l_hid)

            critical_size = hidden_sizes[0]
            
            for idx, conv_filter, filter_size, stride, pad in zip(range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_hid = L.Conv2DLayer(
                    l_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="SL_conv_hidden_%d" % idx,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
           
            
            l_hid = L.flatten(l_hid, name="conv_flatten")
            critical_layer = L.DenseLayer(
                    l_hid,
                    num_units=hidden_sizes[0],
                    nonlinearity=None,
                    name="SL_fc",
                    W=hidden_W_init,
                    b=hidden_b_init,
                    weight_normalization=weight_normalization,
                )
            #critical_layer = L.flatten(critical_layer)

            # if output_nonlinearity == L.spatial_expected_softmax:
            #     assert len(hidden_sizes) == 0
            #     assert output_dim == conv_filters[-1] * 2
            #     l_hid.nonlinearity = tf.identity
            #     l_out = L.SpatialExpectedSoftmaxLayer(l_hid)
            
            
            self.actValues = L.get_output(critical_layer)

            #list_rem = hidden_sizes[1:]
            

            #####Forward pass block#################################
            with tf.variable_scope("PG"):

                
                # fcFor = L.DenseLayer(
                #     critical_layer,
                #     num_units = hidden_sizes[1],
                #     nonlinearity=hidden_nonlinearity,
                #     name="pgLayer_init",
                #     W=hidden_W_init,
                #     b=hidden_b_init,
                #     weight_normalization=weight_normalization,
                # )
                
                fc_1 = L.DenseLayer(
                    critical_layer,
                    num_units=hidden_sizes[1],
                    nonlinearity=hidden_nonlinearity,
                    name="pgLayer_1",
                    W=hidden_W_init,
                    b=hidden_b_init,
                    weight_normalization=weight_normalization,
                )


                fc_2 = L.DenseLayer(
                    fc_1,
                    num_units=hidden_sizes[2],
                    nonlinearity=hidden_nonlinearity,
                    name="pgLayer_2" ,
                    W=hidden_W_init,
                    b=hidden_b_init,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    fc_2 = L.batch_norm(fcFor) 

                fcOut = L.DenseLayer(
                    fc_2,
                    num_units=output_dim,
                    nonlinearity=output_nonlinearity,
                    name="output",
                    W=output_W_init,
                    b=output_b_init,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    fcOut = L.batch_norm(fcOut)
            ###################################################

            self.actVariable = tf.Variable(initial_value = tf.zeros([ 10000, 32], dtype = tf.float32),name = "act_var1", trainable = True)

           
            bcOut = fcOut.get_output_for(fc_2.get_output_for(fc_1.get_output_for(self.actVariable)))
            self.bcOut = bcOut

            backOutLayer = L.InputLayer(shape = (), input_var= bcOut , name="OutputLayer") 
            # shape is (actVariable[0] , 2)



            self._l_in = l_in
            self.forwardOutLayer = fcOut
            self.backOutLayer = backOutLayer
            
            outLayers = [fcOut, backOutLayer]
            # self._input_var = l_in.input_var

        LayersPowered.__init__(self, outLayers)
Exemplo n.º 11
0
    def __init__(
            self,
            env_spec,
            name='qnet',
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.relu,
            action_merge_layer=-2,
            output_nonlinearity=None,
            hidden_W_init=L.XavierUniformInitializer(),
            hidden_b_init=tf.zeros_initializer,
            output_W_init=L.XavierUniformInitializer(),
            output_b_init=tf.zeros_initializer,
            bn=False):
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs")
            l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            l_hidden = l_obs

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = L.batch_norm(l_hidden)

                if idx == action_merge_layer:
                    l_hidden = L.ConcatLayer([l_hidden, l_action])

                l_hidden = L.DenseLayer(
                    l_hidden,
                    num_units=size,
                    W=hidden_W_init,
                    b=hidden_b_init,
                    nonlinearity=hidden_nonlinearity,
                    name="h%d" % (idx + 1)
                )

            if action_merge_layer == n_layers:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(
                l_hidden,
                num_units=1,
                W=output_W_init,
                b=output_b_init,
                nonlinearity=output_nonlinearity,
                name="output"
            )

            #output_var = L.get_output(l_output, deterministic=True).flatten()
            output_var = tf.reshape(L.get_output(l_output, deterministic=True),(-1,))

            self._f_qval = tensor_utils.compile_function([l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self._output_nonlinearity = output_nonlinearity

            LayersPowered.__init__(self, [l_output])
Exemplo n.º 12
0
    def __init__(
        self,
        name,
        env_spec,
        qmdp_param,
        feature_network=None,
        state_include_action=True,
    ):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """

        with tf.variable_scope(name):
            assert isinstance(env_spec.action_space, Discrete)
            Serializable.quick_init(self, locals())
            super(QMDPPolicy, self).__init__(env_spec)

            self.qmdp_param = qmdp_param

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            prob_network = QMDPNetwork(input_shape=(feature_dim, ),
                                       input_layer=l_feature,
                                       output_dim=env_spec.action_space.n,
                                       qmdp_param=qmdp_param,
                                       name="prob_network")

            self.prob_network = prob_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_prob = tensor_utils.compile_function(
                [
                    flat_input_var,
                    # prob_network.step_prev_hidden_layer.input_var
                    prob_network.step_prev_state_layer.input_var
                ],
                L.get_output([
                    prob_network.step_output_layer,
                    prob_network.step_hidden_layer
                ], {prob_network.step_input_layer: feature_var}))

            self.debug = tensor_utils.compile_function(
                [
                    flat_input_var,
                    # prob_network.step_prev_hidden_layer.input_var
                    prob_network.step_prev_state_layer.input_var
                ],
                # [self.prob_network._l_output_flat.plannernet.printQ]
                [
                    # self.prob_network._l_output_flat.plannernet.f_pi.fclayers.fclayers[0].w,
                    self.prob_network._l_output_flat.R0,
                    self.prob_network._l_gru.z_os
                ])

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = qmdp_param['num_state']

            self.prev_actions = None
            self.prev_hiddens = None
            self.dist = RecurrentCategorical(env_spec.action_space.n)

            out_layers = [prob_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
Exemplo n.º 13
0
    def __init__(self,
                 name,
                 input_dim,
                 output_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 vocab_size,
                 embedding_size,
                 hidden_W_init=L.xavier_init,
                 hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.xavier_init,
                 output_b_init=tf.zeros_initializer,
                 has_other_input=True,
                 input_var=None,
                 input_layer=None,
                 **kwargs):
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                input_layer = L.InputLayer(shape=(None, input_dim),
                                           input_var=input_var,
                                           name="input")
            l_in = input_layer

            if has_other_input:
                # Slice apart
                l_other_in = L.SliceLayer(l_in,
                                          "slice_other",
                                          slice(0, input_dim - vocab_size),
                                          axis=-1)
                l_emb_in = L.SliceLayer(l_in,
                                        "slice_emb",
                                        slice(input_dim - vocab_size,
                                              input_dim),
                                        axis=-1)

                # HACK: This is cheap with small embedding matrices but will not scale well..
                # Find a better way to lookup from this representation + mean-pool
                l_embs = MeanPoolEmbeddingLayer(l_emb_in, "embeddings",
                                                embedding_size)

                l_hidden_input = L.ConcatLayer([l_other_in, l_embs], "merge")
            else:
                l_hidden_input = l_in

            hidden_layers = [l_hidden_input]
            for i, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(hidden_layers[-1],
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%i" % i,
                                     W=hidden_W_init,
                                     b=hidden_b_init)
                hidden_layers.append(l_hid)

            l_out = L.DenseLayer(hidden_layers[-1],
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init)

            self.input_layer = l_in
            self.input_var = l_in.input_var
            self.output_layer = l_out

            LayersPowered.__init__(self, l_out)
Exemplo n.º 14
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer(),
                 input_var=None,
                 input_layer=None,
                 batch_normalization=False,
                 weight_normalization=False):
        Serializable.quick_init(self, locals())
        """
        A network composed of several convolution layers followed by some fc layers.
        input_shape: (width,height,channel)
            HOWEVER, network inputs are assumed flattened. This network will first unflatten the inputs and then apply the standard convolutions and so on.
        conv_filters: a list of numbers of convolution kernel
        conv_filter_sizes: a list of sizes (int) of the convolution kernels
        conv_strides: a list of strides (int) of the conv kernels
        conv_pads: a list of pad formats (either 'SAME' or 'VALID')
        hidden_nonlinearity: a nonlinearity from tf.nn, shared by all conv and fc layers
        hidden_sizes: a list of numbers of hidden units for all fc layers
        """
        with tf.variable_scope(name):
            if input_layer is not None:
                l_in = input_layer
                l_hid = l_in
            elif len(input_shape) == 3:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                    input_var=input_var,
                                    name="input")
                l_hid = L.reshape(l_in, ([0], ) + input_shape,
                                  name="reshape_input")
            elif len(input_shape) == 2:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                    input_var=input_var,
                                    name="input")
                input_shape = (1, ) + input_shape
                l_hid = L.reshape(l_in, ([0], ) + input_shape,
                                  name="reshape_input")
            else:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
                l_hid = l_in

            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, conv_filter, filter_size, stride, pad in zip(
                    range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_hid = L.Conv2DLayer(
                    l_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="conv_hidden_%d" % idx,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)

            if output_nonlinearity == L.spatial_expected_softmax:
                assert len(hidden_sizes) == 0
                assert output_dim == conv_filters[-1] * 2
                l_hid.nonlinearity = tf.identity
                l_out = L.SpatialExpectedSoftmaxLayer(l_hid)
            else:
                l_hid = L.flatten(l_hid, name="conv_flatten")
                for idx, hidden_size in enumerate(hidden_sizes):
                    l_hid = L.DenseLayer(
                        l_hid,
                        num_units=hidden_size,
                        nonlinearity=hidden_nonlinearity,
                        name="hidden_%d" % idx,
                        W=hidden_W_init,
                        b=hidden_b_init,
                        weight_normalization=weight_normalization,
                    )
                    if batch_normalization:
                        l_hid = L.batch_norm(l_hid)
                l_out = L.DenseLayer(
                    l_hid,
                    num_units=output_dim,
                    nonlinearity=output_nonlinearity,
                    name="output",
                    W=output_W_init,
                    b=output_b_init,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    l_out = L.batch_norm(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var

        LayersPowered.__init__(self, l_out)
Exemplo n.º 15
0
    def make_network(self,
                     dim_input,
                     dim_output,
                     subnetwork_hidden_sizes,
                     nn_input=None,
                     target=None,
                     discriminator_options=[]):
        if dim_input[0] != 1:
            raise NotImplementedError  #only allow 1 frame

        # create input layer
        if nn_input is None:
            nn_input = tf.placeholder('float', [None, dim_input[1]],
                                      name='nn_input')

        if target is None:
            target = tf.placeholder('float', [None, dim_output],
                                    name='targets')

        l_in = L.InputLayer(shape=(None, ) + tuple([dim_input[1]]),
                            input_var=nn_input)
        self.input_layer = l_in

        if len(discriminator_options) < self.num_options:
            for i in range(len(discriminator_options), self.num_options):
                subnet, out_layer = self._make_subnetwork(
                    l_in,
                    dim_output=1,
                    hidden_sizes=subnetwork_hidden_sizes,
                    output_nonlinearity=None,
                    name="option%d" % i)
                discriminator_options.append(subnet)

        # only apply softmax if we're doing mixtures, if sparse mixtures or options, need to apply after sparsifying
        gating_network, self.gating_network_out_layer = self._make_gating_network(
            l_in, apply_softmax=True, hidden_sizes=subnetwork_hidden_sizes)

        #TODO: a better formulation is to have terminations be a latent variable that somehow sums to 1
        #TODO: can we combined these mixtures in interesting ways to train each other?

        # For example, can we have one net that takes into it pixels and another that takes in states, then we backprop
        # through the whole network using information from the states during training, but then drop that part of the network
        # and in that way keep information from the state and transfer it to the image inputs.
        # NOTE: if we don't do this and you see this in our code and decide to do it, please reach out to us first.

        # Get the top K options if using optiongan
        if not self.mixtures:
            print("Using options")
            k = 1
            indices = tf.nn.top_k(gating_network, k=k).indices
            vec = tf.zeros(tf.shape(gating_network))
            for k in range(k):
                vec += tf.reshape(
                    tf.one_hot(indices[:, k],
                               tf.shape(gating_network)[1]),
                    tf.shape(gating_network))
            # v = tf.cast(vec == 0, vec.dtype) * -math.inf
            # gating_network = tf.nn.softmax(vec )

        self.class_target = target
        self.nn_input = nn_input
        self.discriminator_options = discriminator_options
        self.termination_softmax_logits = gating_network

        combined_options = tf.concat(discriminator_options, axis=1)
        self.discrimination_logits = tf.reshape(
            tf.reduce_sum(combined_options * gating_network, axis=1), [-1, 1])

        self.termination_importance_values = tf.reduce_sum(
            self.termination_softmax_logits, axis=0)

        self.loss, self.optimizer = self.get_loss_layer(
            pred=self.discrimination_logits, target_output=target)

        # #################
        # Metrics
        # #################

        # accuracy
        label_accuracy = tf.equal(
            tf.round(tf.nn.sigmoid(self.discrimination_logits)),
            tf.round(self.class_target))
        self.label_accuracy = tf.reduce_mean(
            tf.cast(label_accuracy, tf.float32))

        # error
        self.mse = tf.reduce_mean(
            tf.nn.l2_loss(
                tf.nn.sigmoid(self.discrimination_logits) - self.class_target))

        # precision and recall
        ones = tf.ones_like(self.class_target)
        true_positives = tf.round(tf.nn.sigmoid(
            self.discrimination_logits)) * tf.round(self.class_target)
        predicted_positives = tf.round(
            tf.nn.sigmoid(self.discrimination_logits))
        false_negatives = tf.logical_not(
            tf.logical_xor(
                tf.equal(tf.round(tf.nn.sigmoid(self.discrimination_logits)),
                         ones), tf.equal(tf.round(self.class_target), ones)))
        self.label_precision = tf.reduce_sum(
            tf.cast(true_positives, tf.float32)) / tf.reduce_sum(
                tf.cast(predicted_positives, tf.float32))
        self.label_recall = tf.reduce_sum(tf.cast(
            true_positives, tf.float32)) / (
                tf.reduce_sum(tf.cast(true_positives, tf.float32)) +
                tf.reduce_sum(tf.cast(false_negatives, tf.float32)))
Exemplo n.º 16
0
    def __init__(
        self,
        name,
        env_spec,
        hidden_dim=32,
        feature_network=None,
        state_include_action=True,
        hidden_nonlinearity=tf.tanh,
        weight_normalization=False,
        layer_normalization=False,
        optimizer=None,
        # these are only used when computing predictions in batch
        batch_size=None,
        n_steps=None,
        log_loss_before=True,
        log_loss_after=True,
        moments_update_rate=0.9,
    ):
        Serializable.quick_init(self, locals())
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """

        self.observation_space = env_spec.observation_space
        self.action_space = env_spec.action_space

        with tf.variable_scope(name):
            super(L2RNNBaseline, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.pack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))
            prediction_network = GRUNetwork(
                input_shape=(feature_dim, ),
                input_layer=l_feature,
                output_dim=1,
                hidden_dim=hidden_dim,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=None,
                name="prediction_network")
            self.prediction_network = prediction_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim
            self.state_dim = prediction_network.state_dim
            self.batch_size = batch_size
            self.n_steps = n_steps

            self.prev_actions = None
            self.prev_states = None

            out_layers = [prediction_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

        if optimizer is None:
            optimizer = TBPTTOptimizer()

        self.optimizer = optimizer
        self.log_loss_before = log_loss_before
        self.log_loss_after = log_loss_after
        self.moments_update_rate = moments_update_rate

        state_input_var = tf.placeholder(tf.float32,
                                         (None, prediction_network.state_dim),
                                         "state")
        recurrent_state_output = dict()

        if feature_network is not None:
            predict_flat_input_var = tf.reshape(
                l_input.input_var,
                tf.pack((tf.shape(l_input.input_var)[0] *
                         tf.shape(l_input.input_var)[1],
                         tf.shape(l_input.input_var)[2])))
            layer_data = {feature_network.input_layer: predict_flat_input_var}
        else:
            layer_data = dict()

        prediction_var = L.get_output(
            prediction_network.output_layer,
            layer_data,
            recurrent_state={
                prediction_network.recurrent_layer: state_input_var
            },
            recurrent_state_output=recurrent_state_output,
        )
        direct_prediction_var = L.get_output(prediction_network.output_layer,
                                             layer_data)

        state_output = recurrent_state_output[
            prediction_network.recurrent_layer]
        final_state = tf.reverse(state_output, [1])[:, 0, :]

        return_var = tf.placeholder(dtype=tf.float32,
                                    shape=(None, None),
                                    name="return")
        valid_var = tf.placeholder(dtype=tf.float32,
                                   shape=(None, None),
                                   name="valid")

        return_mean_var = tf.Variable(
            np.cast['float32'](0.),
            name="return_mean",
        )
        return_std_var = tf.Variable(
            np.cast['float32'](1.),
            name="return_std",
        )

        normalized_return_var = (return_var - return_mean_var) / return_std_var

        residue = tf.reshape(prediction_var,
                             (-1, )) - tf.reshape(normalized_return_var,
                                                  (-1, ))

        loss_var = tf.reduce_sum(
            tf.square(residue) * tf.reshape(valid_var,
                                            (-1, ))) / tf.reduce_sum(valid_var)

        self.f_predict = tensor_utils.compile_function(
            inputs=[l_input.input_var],
            outputs=direct_prediction_var * return_std_var + return_mean_var,
        )
        self.f_predict_stateful = tensor_utils.compile_function(
            inputs=[l_input.input_var, state_input_var],
            outputs=[
                prediction_var * return_std_var + return_mean_var, final_state
            ],
        )

        return_mean_stats = tf.reduce_sum(
            return_var * valid_var) / tf.reduce_sum(valid_var)
        return_std_stats = tf.sqrt(
            tf.reduce_sum(tf.square(return_var - return_mean_var) * valid_var)
            / tf.reduce_sum(valid_var))

        self.f_update_stats = tensor_utils.compile_function(
            inputs=[return_var, valid_var],
            outputs=[
                tf.assign(
                    return_mean_var,
                    (1 - self.moments_update_rate) * return_mean_var + \
                    self.moments_update_rate * return_mean_stats,
                ),
                tf.assign(
                    return_std_var,
                    (1 - self.moments_update_rate) * return_std_var + \
                    self.moments_update_rate * return_std_stats,
                )
            ]
        )

        self.return_mean_var = return_mean_var
        self.return_std_var = return_std_var
        LayersPowered.__init__(self, out_layers)

        self.optimizer.update_opt(
            loss=loss_var,
            target=self,
            inputs=[l_input.input_var, return_var, valid_var],
            rnn_state_input=state_input_var,
            rnn_final_state=final_state,
            rnn_init_state=prediction_network.state_init_param,
        )
Exemplo n.º 17
0
    def _make_mlp(self):
        hidden_sizes = self._weight_manager.hidden_sizes
        dom_meta = self._weight_manager.dom_meta
        prob_meta = self._prob_meta

        # input vector spec:
        #
        # |<--num_acts-->|<--k*num_acts-->|<--num_props-->|
        # | action mask  |  action data   | propositions  |
        #
        # 1) `action_mask` tells us whether actions are enabled
        # 2) `action_data` is passed straight to action modules
        # 3) `propositions` tells us what is and isn't true
        #
        # Reminder: this convoluted input shape is required solely because of
        # rllab inflexible input conventions (it can only take a single vector
        # per state).

        mask_size = prob_meta.num_acts
        extra_data_dim = self._weight_manager.extra_dim
        extra_size = extra_data_dim * prob_meta.num_acts
        prop_size = prob_meta.num_props
        in_dim = mask_size + extra_size + prop_size
        l_in = L.InputLayer(shape=(None, in_dim))
        l_mask = L.OpLayer(
            l_in,
            lambda inv: inv[:, :mask_size],
            lambda s: s[:1] + (mask_size, ) + s[2:],
            name='split/mask')

        def act_extra_inner(in_vec):
            act_vecs = in_vec[:, mask_size:mask_size + extra_size]
            # unflatten
            # inner_shape = tf.TensorShape(
            #     (prob_meta.num_acts, extra_data_dim))
            # out_shape = act_vecs.shape[:1] + inner_shape
            out_shape = (-1, prob_meta.num_acts, extra_data_dim)
            return tf.reshape(act_vecs, out_shape)

        def obs_inner(in_vec):
            prop_truth = in_vec[:, mask_size + extra_size:, None]
            goal_vec = [
                float(prop in prob_meta.goal_props)
                for prop in prob_meta.bound_props_ordered
            ]

            assert sum(goal_vec) == len(prob_meta.goal_props)
            assert any(goal_vec), 'there are no goals?!'
            assert not all(goal_vec), 'there are no goals?!'

            # apparently this broadcasts (hooray!)
            tf_goals = tf.constant(goal_vec)[None, :, None]
            batch_size = tf.shape(prop_truth)[0]
            tf_goals_broad = tf.tile(tf_goals, (batch_size, 1, 1))
            return tf.concat([prop_truth, tf_goals_broad], axis=2)

        l_obs = L.OpLayer(
            l_in,
            obs_inner,
            lambda s: s[:1] + (prop_size, 2),
            name='split/obs')
        pred_dict = self._split_input(l_obs)
        if extra_data_dim > 0:
            l_act_extra = L.OpLayer(
                l_in,
                act_extra_inner,
                lambda s: s[:1] + (prob_meta.num_acts, extra_data_dim),
                name='split/extra')
            extra_dict = self._split_extra(l_act_extra)
        else:
            extra_dict = None

        # hidden layers
        for hid_idx, hid_sizes in enumerate(hidden_sizes):
            act_size, prop_size = hid_sizes

            act_dict = {}
            for unbound_act in dom_meta.unbound_acts:
                act_dict[unbound_act] = self._make_action_module(
                    pred_dict,
                    unbound_act,
                    act_size,
                    hid_idx,
                    l_in,
                    dropout=self.dropout,
                    norm_response=self.norm_response,
                    extra_dict=extra_dict)

            pred_dict = {}
            for pred_name in dom_meta.pred_names:
                pred_dict[pred_name] = self._make_prop_module(
                    act_dict,
                    pred_name,
                    prop_size,
                    hid_idx,
                    l_in,
                    dropout=self.dropout,
                    norm_response=self.norm_response)

        # final (action) layer
        finals = {}
        for unbound_act in dom_meta.unbound_acts:
            finals[unbound_act] = self._make_action_module(
                pred_dict,
                unbound_act,
                1,
                len(hidden_sizes),
                l_in,
                nonlinearity=tf.identity,
                # can't have ANY dropout in final layer!
                dropout=0.0,
                # or normalisation
                norm_response=False,
                extra_dict=extra_dict)
        l_pre_softmax = self._merge_finals(finals)
        self._l_out = L.OpLayer(
            l_pre_softmax, masked_softmax, extras=[l_mask], name='l_out')
        self._l_in = l_in
Exemplo n.º 18
0
    def __init__(
        self,
        name,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        hidden_W_init=L.XavierUniformInitializer(),
        hidden_b_init=tf.zeros_initializer(),
        output_W_init=L.XavierUniformInitializer(),
        output_b_init=tf.zeros_initializer(),
        input_var=None,
        input_layer=None,
        input_shape=None,
        batch_normalization=False,
        weight_normalization=False,
    ):

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                assert input_shape is not None, \
                    "input_layer or input_shape must be supplied"
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_out_raw = L.DenseLayer(l_hid,
                                     num_units=output_dim,
                                     name="output",
                                     W=output_W_init,
                                     b=output_b_init,
                                     weight_normalization=weight_normalization)
            if batch_normalization:
                l_out_raw = L.batch_norm(l_out_raw)
            self._layers.append(l_out_raw)

            # mask assumed to occupy first output_dim elements
            def mask_op(X):
                return X[..., :output_dim]

            def mask_shape_op(old_shape):
                return old_shape[:-1] + (output_dim, )

            mask = L.OpLayer(l_in, mask_op, shape_op=mask_shape_op)
            self._layers.append(mask)
            l_out = L.OpLayer(l_out_raw, masked_softmax, extras=[mask])
            self._layers.append(l_out)

            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Exemplo n.º 19
0
    def __init__(self,
                 env_spec,
                 name='Phinet',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None,
                                        env_spec.observation_space.flat_dim),
                                 name="obs")
            l_action = L.InputLayer(shape=(None,
                                           env_spec.action_space.flat_dim),
                                    name="action")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            # self.obs_rms = RunningMeanStd(shape=(env_spec.observation_space.flat_dim, ))

            # obz = L.NormalizeLayer(l_obs, rms=self.obs_rms, clip_min=-5., clip_max=5.)
            obz = l_obs
            obs_hidden = L.DenseLayer(obz,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="obs_h%d" % (0))

            act_hidden = L.DenseLayer(l_action,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="act_h%d" % (0))

            merge_hidden = L.OpLayer(obs_hidden,
                                     op=lambda x, y: x + y,
                                     shape_op=lambda x, y: x,
                                     extras=[act_hidden])

            l_hidden = merge_hidden

            for idx, size in enumerate(hidden_sizes[1:]):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            # for idx, size in enumerate(hidden_sizes):
            #     if bn:
            #         l_hidden = batch_norm(l_hidden)

            #     if idx == action_merge_layer:
            #         l_hidden = L.ConcatLayer([l_hidden, l_action])

            #     l_hidden = L.DenseLayer(
            #         l_hidden,
            #         num_units=size,
            #         nonlinearity=hidden_nonlinearity,
            #         name="h%d" % (idx + 1)
            #     )

            # if action_merge_layer == n_layers:
            #     l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            output_var = L.get_output(l_output, deterministic=True)
            output_var = tf.reshape(output_var, (-1, ))

            self._f_phival = tensor_utils.compile_function(
                [l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self.output_nonlinearity = output_nonlinearity

            LayersPowered.__init__(self, [l_output])
    def __init__(
            self,
            env_spec,
            name='qnet',
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.relu,
            action_merge_layer=-2,
            output_nonlinearity=None,
            # hidden_W_init=L.XavierUniformInitializer(),
            # hidden_b_init=L.ZerosInitializer(),
            # output_W_init=L.XavierUniformInitializer(),
            # output_b_init=L.ZerosInitializer(),
            c=1.0,  # temperature variable for stochastic policy
            bn=False):
        Serializable.quick_init(self, locals())

        # assert env_spec.action_space.is_discrete
        self._n = 2
        self._c = c
        self._env_spec = env_spec

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None,
                                        env_spec.observation_space.flat_dim),
                                 name="obs")
            l_action = L.InputLayer(shape=(None, 2),
                                    var_type=tf.uint8,
                                    name="actions")

            n_layers = len(hidden_sizes) + 1

            l_hidden = l_obs

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = L.batch_norm(l_hidden)

                l_hidden = L.DenseLayer(
                    l_hidden,
                    num_units=size,
                    # W=hidden_W_init,
                    # b=hidden_b_init,
                    nonlinearity=hidden_nonlinearity,
                    name="h%d" % (idx + 1))

            l_output_vec = L.DenseLayer(
                l_hidden,
                num_units=2,
                # W=output_W_init,
                # b=output_b_init,
                nonlinearity=output_nonlinearity,
                name="output")

            output_vec_var = L.get_output(l_output_vec, deterministic=True)

            output_var = tf.reduce_sum(
                output_vec_var * tf.to_float(l_action.input_var), 1)

            self._f_qval = tensor_utils.compile_function(
                [l_obs.input_var, l_action.input_var], output_var)
            self._f_qval_vec = tensor_utils.compile_function([l_obs.input_var],
                                                             output_vec_var)
            self._output_vec_layer = l_output_vec
            self._obs_layer = l_obs
            self._action_layer = l_action
            self._output_nonlinearity = output_nonlinearity

            self.init_policy()

            LayersPowered.__init__(self, [l_output_vec])
    def make_network_image(self,
                           dim_input,
                           dim_output,
                           nn_input=None,
                           target=None):
        """
        An example a network in tf that has both state and image inputs.
        Args:
            dim_input: Dimensionality of input. expecting 2d tuple (num_frames x num_batches)
            dim_output: Dimensionality of the output.
            batch_size: Batch size.
            network_config: dictionary of network structure parameters
        Returns:
            A tfMap object that stores inputs, outputs, and scalar loss.
        """
        if dim_input[0] != 1:
            raise Exception(
                "Currently don't support concatenating timesteps for images")

        n_mlp_layers = 2
        layer_size = 128
        dim_hidden = (n_mlp_layers - 1) * [layer_size]
        dim_hidden.append(dim_output)
        pool_size = 2
        filter_size = 3

        num_filters = [5, 5]

        #TODO: don't do this grossness
        if nn_input is None:
            nn_input, _ = self.get_input_layer_image(dim_input[1], dim_output)

        if target is None:
            _, target = self.get_input_layer_image(dim_input[1], dim_output)

        conv_filters = [5, 5]
        conv_filter_sizes = [3, 3]
        conv_pads = ['SAME', 'SAME']
        max_pool_sizes = [2, 2]
        conv_strides = [1, 1]
        hidden_sizes = [100, 100]
        hidden_nonlinearity = tf.nn.relu
        output_nonlinearity = None

        l_in = L.InputLayer(shape=tuple(nn_input.get_shape().as_list()),
                            input_var=nn_input)

        l_hid = L.reshape(l_in, ([0], ) + dim_input[1], name="reshape_input")

        for idx, conv_filter, filter_size, stride, pad, max_pool_size in zip(
                range(len(conv_filters)), conv_filters, conv_filter_sizes,
                conv_strides, conv_pads, max_pool_sizes):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="conv_hidden_%d" % idx,
            )
            if max_pool_size is not None:
                l_hid = L.Pool2DLayer(l_hid, max_pool_size, pad="SAME")

        l_hid = L.flatten(l_hid, name="conv_flatten")

        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="hidden_%d" % idx,
            )

        fc_output = L.get_output(
            L.DenseLayer(
                l_hid,
                num_units=dim_output,
                nonlinearity=output_nonlinearity,
                name="output",
            ))

        loss, optimizer = self.get_loss_layer(pred=fc_output,
                                              target_output=target)

        self.class_target = target
        self.nn_input = nn_input
        self.discrimination_logits = fc_output
        self.optimizer = optimizer
        self.loss = loss
        label_accuracy = tf.equal(
            tf.round(tf.nn.sigmoid(self.discrimination_logits)),
            tf.round(self.class_target))
        self.label_accuracy = tf.reduce_mean(
            tf.cast(label_accuracy, tf.float32))
        self.mse = tf.reduce_mean(
            tf.nn.l2_loss(
                tf.nn.sigmoid(self.discrimination_logits) - self.class_target))
        ones = tf.ones_like(self.class_target)

        true_positives = tf.round(tf.nn.sigmoid(
            self.discrimination_logits)) * tf.round(self.class_target)
        predicted_positives = tf.round(
            tf.nn.sigmoid(self.discrimination_logits))

        false_negatives = tf.logical_not(
            tf.logical_xor(
                tf.equal(tf.round(tf.nn.sigmoid(self.discrimination_logits)),
                         ones), tf.equal(tf.round(self.class_target), ones)))

        self.label_precision = tf.reduce_sum(
            tf.cast(true_positives, tf.float32)) / tf.reduce_sum(
                tf.cast(predicted_positives, tf.float32))
        self.label_recall = tf.reduce_sum(tf.cast(
            true_positives, tf.float32)) / (
                tf.reduce_sum(tf.cast(true_positives, tf.float32)) +
                tf.reduce_sum(tf.cast(false_negatives, tf.float32)))
Exemplo n.º 22
0
    def __init__(self,
                 name,
                 output_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer(),
                 input_var=None,
                 input_layer=None,
                 input_shape=None,
                 batch_normalization=False,
                 weight_normalization=False,
                 latent_dim=0,
                 latent_shape=None,
                 obs_shape=None):

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            # latent_in = L.InputLayer(shape=(None,) + latent_shape, input_var=l_in.input_var[:, -latent_dim:], name='latent')
            # obs_in = L.InputLayer(shape=(None,) + obs_shape, input_var=l_in.input_var[:, :-latent_dim], name='obs_input')
            latent_in = L.SliceLayer(l_in,
                                     slice(-latent_dim, None, None),
                                     axis=-1)
            obs_in = L.SliceLayer(l_in, slice(0, -latent_dim, None), axis=-1)
            self._layers = [obs_in]
            l_hid = obs_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_latent_out = L.DenseLayer(
                latent_in,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="hidden_latent_0",
                W=hidden_W_init,
                b=hidden_b_init,
                weight_normalization=weight_normalization)
            if batch_normalization:
                l_latent_out = L.batch_norm(l_latent_out)
            self._layers.append(l_latent_out)

            l_hid = L.ElemwiseSumLayer([l_hid, l_latent_out])

            # l_hid = L.OpLayer(
            #     l_hid,
            #     op=lambda l_hid, l_latent:
            #     l_hid + l_latent,
            #     shape_op=lambda l_hid_shape, l_latent_shape:
            #     l_hid_shape,
            #     extras=[l_latent_out],
            #     name='sum_obs_latent')

            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)
            if batch_normalization:
                l_out = L.batch_norm(l_out)
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Exemplo n.º 23
0
    def __init__(self,
                 name,
                 env_spec,
                 hidden_dim=32,
                 feature_network=None,
                 prob_network=None,
                 state_include_action=True,
                 hidden_nonlinearity=tf.tanh,
                 forget_bias=1.0,
                 use_peepholes=False,
                 lstm_layer_cls=L.LSTMLayer):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        with tf.variable_scope(name):
            assert isinstance(env_spec.action_space, Discrete)
            Serializable.quick_init(self, locals())
            super(CategoricalLSTMPolicy, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            if prob_network is None:
                prob_network = LSTMNetwork(
                    input_shape=(feature_dim, ),
                    input_layer=l_feature,
                    output_dim=env_spec.action_space.n,
                    hidden_dim=hidden_dim,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    forget_bias=forget_bias,
                    use_peepholes=use_peepholes,
                    lstm_layer_cls=lstm_layer_cls,
                    name="prob_network")

            self.prob_network = prob_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_prob = tensor_utils.compile_function(
                [
                    flat_input_var,
                    #prob_network.step_prev_hidden_layer.input_var,
                    #prob_network.step_prev_cell_layer.input_var
                    prob_network.step_prev_state_layer.input_var,
                ],
                L.get_output([
                    prob_network.step_output_layer,
                    prob_network.step_hidden_layer,
                    prob_network.step_cell_layer
                ], {prob_network.step_input_layer: feature_var}))

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim

            self.prev_actions = None
            self.prev_hiddens = None
            self.prev_cells = None
            self.dist = RecurrentCategorical(env_spec.action_space.n)

            out_layers = [prob_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
Exemplo n.º 24
0
    def make_network(self,
                     dim_input,
                     dim_output,
                     nn_input=None,
                     target=None,
                     hidden_sizes=(50, )):
        """
        An example a network in tf that has both state and image inputs.
        Args:
            dim_input: Dimensionality of input. expecting 2d tuple (num_frames x num_batches)
            dim_output: Dimensionality of the output.
            batch_size: Batch size.
            network_config: dictionary of network structure parameters
        Returns:
            A tfMap object that stores inputs, outputs, and scalar loss.
        """

        if nn_input is None:
            nn_input = tf.placeholder('float',
                                      [None, dim_input[0], dim_input[1]],
                                      name='nn_input')

        if target is None:
            target = tf.placeholder('float', [None, dim_output],
                                    name='targets')

        l_in = L.InputLayer(shape=(None, ) + tuple(dim_input),
                            input_var=nn_input,
                            name="input")

        prob_network = MLP(output_dim=dim_output,
                           hidden_sizes=hidden_sizes,
                           hidden_nonlinearity=tf.nn.relu,
                           output_nonlinearity=None,
                           name="pred_network",
                           input_layer=l_in)

        fc_output = L.get_output(prob_network.output_layer)

        loss, optimizer = self.get_loss_layer(pred=fc_output,
                                              target_output=target)

        self.class_target = target
        self.nn_input = nn_input
        self.discrimination_logits = fc_output
        self.optimizer = optimizer
        self.loss = loss

        label_accuracy = tf.equal(
            tf.round(tf.nn.sigmoid(self.discrimination_logits)),
            tf.round(self.class_target))

        self.label_accuracy = tf.reduce_mean(
            tf.cast(label_accuracy, tf.float32))
        self.mse = tf.reduce_mean(
            tf.nn.l2_loss(
                tf.nn.sigmoid(self.discrimination_logits) - self.class_target))

        ones = tf.ones_like(self.class_target)

        true_positives = tf.round(tf.nn.sigmoid(
            self.discrimination_logits)) * tf.round(self.class_target)
        predicted_positives = tf.round(
            tf.nn.sigmoid(self.discrimination_logits))

        false_negatives = tf.logical_not(
            tf.logical_xor(
                tf.equal(tf.round(tf.nn.sigmoid(self.discrimination_logits)),
                         ones), tf.equal(tf.round(self.class_target), ones)))

        self.label_precision = tf.reduce_sum(
            tf.cast(true_positives, tf.float32)) / tf.reduce_sum(
                tf.cast(predicted_positives, tf.float32))
        self.label_recall = tf.reduce_sum(tf.cast(
            true_positives, tf.float32)) / (
                tf.reduce_sum(tf.cast(true_positives, tf.float32)) +
                tf.reduce_sum(tf.cast(false_negatives, tf.float32)))
Exemplo n.º 25
0
    def __init__(self,
                 name,
                 input_shape,
                 extra_input_shape,
                 output_dim,
                 hidden_sizes,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 extra_hidden_sizes=None,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer(),
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None):
        Serializable.quick_init(self, locals())

        if extra_hidden_sizes is None:
            extra_hidden_sizes = []

        with tf.variable_scope(name):

            input_flat_dim = np.prod(input_shape)
            extra_input_flat_dim = np.prod(extra_input_shape)
            total_input_flat_dim = input_flat_dim + extra_input_flat_dim

            if input_layer is None:
                l_in = L.InputLayer(shape=(None, total_input_flat_dim),
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_conv_in = L.reshape(L.SliceLayer(l_in,
                                               indices=slice(input_flat_dim),
                                               name="conv_slice"),
                                  ([0], ) + input_shape,
                                  name="conv_reshaped")
            l_extra_in = L.reshape(L.SliceLayer(l_in,
                                                indices=slice(
                                                    input_flat_dim, None),
                                                name="extra_slice"),
                                   ([0], ) + extra_input_shape,
                                   name="extra_reshaped")

            l_conv_hid = l_conv_in
            for idx, conv_filter, filter_size, stride, pad in zip(
                    range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_conv_hid = L.Conv2DLayer(
                    l_conv_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="conv_hidden_%d" % idx,
                )

            l_extra_hid = l_extra_in
            for idx, hidden_size in enumerate(extra_hidden_sizes):
                l_extra_hid = L.DenseLayer(
                    l_extra_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="extra_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )

            l_joint_hid = L.concat(
                [L.flatten(l_conv_hid, name="conv_hidden_flat"), l_extra_hid],
                name="joint_hidden")

            for idx, hidden_size in enumerate(hidden_sizes):
                l_joint_hid = L.DenseLayer(
                    l_joint_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="joint_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )
            l_out = L.DenseLayer(
                l_joint_hid,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output",
                W=output_W_init,
                b=output_b_init,
            )
            self._l_in = l_in
            self._l_out = l_out

            LayersPowered.__init__(self, [l_out], input_layers=[l_in])
Exemplo n.º 26
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 hidden_dims,
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_step_input = L.InputLayer(shape=(None, ) + input_shape,
                                        name="step_input")
            l_step_prev_hiddens = [
                L.InputLayer(shape=(None, hidden_dim),
                             name="step_prev_hidden%i" % i)
                for i, hidden_dim in enumerate(hidden_dims)
            ]

            # Build the unrolled GRU network, which operates laterally, then
            # vertically
            below = l_in
            l_grus = []
            for i, hidden_dim in enumerate(hidden_dims):
                l_gru = L.GRULayer(below,
                                   num_units=hidden_dim,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   hidden_init_trainable=False,
                                   name="gru%i" % i)
                l_grus.append(l_gru)
                below = l_gru

            # Convert final hidden layer to flat representation
            l_gru_flat = L.ReshapeLayer(l_grus[-1],
                                        shape=(-1, hidden_dims[-1]),
                                        name="gru_flat")
            l_output_flat = L.DenseLayer(l_gru_flat,
                                         num_units=output_dim,
                                         nonlinearity=output_nonlinearity,
                                         name="output_flat")
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.pack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")

            # Build a single step of the GRU network, which operates vertically
            # and is replicated laterally
            below = l_step_input
            l_step_hiddens = []
            for i, (l_gru,
                    prev_hidden) in enumerate(zip(l_grus,
                                                  l_step_prev_hiddens)):
                l_step_hidden = L.GRUStepLayer([below, prev_hidden],
                                               "step_hidden%i" % i, l_gru)
                l_step_hiddens.append(l_step_hidden)
                below = l_step_hidden

            l_step_output = L.DenseLayer(l_step_hiddens[-1],
                                         num_units=output_dim,
                                         nonlinearity=output_nonlinearity,
                                         W=l_output_flat.W,
                                         b=l_output_flat.b,
                                         name="step_output")

            self._l_in = l_in
            self._hid_inits = [l_gru.h0 for l_gru in l_grus]
            self._l_grus = l_grus
            self._l_out = l_output

            self._l_step_input = l_step_input
            self._l_step_prev_hiddens = l_step_prev_hiddens
            self._l_step_hiddens = l_step_hiddens
            self._l_step_output = l_step_output
Exemplo n.º 27
0
    def __init__(self,
                 env_spec,
                 name='qnet',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 eqf_use_full_qf=False,
                 eqf_sample_size=1,
                 mqprop=False,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None,
                                        env_spec.observation_space.flat_dim),
                                 name="obs")
            l_action = L.InputLayer(shape=(None,
                                           env_spec.action_space.flat_dim),
                                    name="actions")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            l_hidden = l_obs

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                if idx == action_merge_layer:
                    l_hidden = L.ConcatLayer([l_hidden, l_action])

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            if action_merge_layer == n_layers:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            output_var = L.get_output(l_output, deterministic=True)
            output_var = tf.reshape(output_var, (-1, ))

            self._f_qval = tensor_utils.compile_function(
                [l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self._output_nonlinearity = output_nonlinearity

            self.eqf_use_full_qf = eqf_use_full_qf
            self.eqf_sample_size = eqf_sample_size
            self.mqprop = mqprop

            LayersPowered.__init__(self, [l_output])
Exemplo n.º 28
0
    def __init__(self,
                 env_spec,
                 name='QuadraticPhinet',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 vs_form=None,
                 bn=False,
                 A=None,
                 init_a=1.0,
                 a_parameterization='exp'):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec
        self.vs_form = vs_form
        with tf.variable_scope(name):
            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            l_act = L.InputLayer(shape=(None, action_dim), name="action")
            action_var = l_act.input_var
            l_obs = L.InputLayer(shape=(None, obs_dim), name="obs")

            self.obs_rms = RunningMeanStd(shape=(obs_dim, ))

            obz = L.NormalizeLayer(l_obs, rms=self.obs_rms)
            l_hidden = l_obs
            hidden_sizes += (action_dim, )

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            obs_var = l_obs.input_var
            fs = l_hidden  # fs_network.output_layer

            if A is not None:
                l_A_param = A.output_layer
            else:
                if a_parameterization == 'exp':
                    init_a_param = np.log(init_a) - .5
                elif a_parameterization == 'softplus':
                    init_a_param = np.log(np.exp(init_a) - 1)
                else:
                    raise NotImplementedError

                l_log_A = L.ParamLayer(
                    l_obs,
                    num_units=action_dim,
                    param=tf.constant_initializer(init_a_param),
                    name="diagonal_a_matrix",
                    trainable=True)
            if vs_form is not None:
                raise NotImplementedError

            self._l_log_A = l_log_A
            self.a_parameterization = a_parameterization
            self.fs = fs

            if vs_form is not None:
                self._output_vs = vs
                LayersPowered.__init__(
                    self, [self.fs, self._l_log_A, self._output_vs])
            else:
                LayersPowered.__init__(self, [self.fs, self._l_log_A])

            output_var = self.get_phival_sym(obs_var, action_var)

            self._f_phival = tensor_utils.compile_function(
                inputs=[obs_var, action_var], outputs=output_var)
Exemplo n.º 29
0
    def __init__(
        self,
        name,
        env_spec,
        hidden_dim=32,
        feature_network=None,
        state_include_action=True,
        hidden_nonlinearity=tf.tanh,
        learn_std=True,
        init_std=1.0,
        output_nonlinearity=None,
        lstm_layer_cls=L.LSTMLayer,
    ):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        with tf.variable_scope(name):
            Serializable.quick_init(self, locals())
            super(GaussianLSTMPolicy, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            mean_network = LSTMNetwork(input_shape=(feature_dim, ),
                                       input_layer=l_feature,
                                       output_dim=action_dim,
                                       hidden_dim=hidden_dim,
                                       hidden_nonlinearity=hidden_nonlinearity,
                                       output_nonlinearity=output_nonlinearity,
                                       lstm_layer_cls=lstm_layer_cls,
                                       name="mean_network")

            l_log_std = L.ParamLayer(
                mean_network.input_layer,
                num_units=action_dim,
                param=tf.constant_initializer(np.log(init_std)),
                name="output_log_std",
                trainable=learn_std,
            )

            l_step_log_std = L.ParamLayer(
                mean_network.step_input_layer,
                num_units=action_dim,
                param=l_log_std.param,
                name="step_output_log_std",
                trainable=learn_std,
            )

            self.mean_network = mean_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_mean_std = tensor_utils.compile_function(
                [
                    flat_input_var,
                    mean_network.step_prev_state_layer.input_var,
                ],
                L.get_output([
                    mean_network.step_output_layer, l_step_log_std,
                    mean_network.step_hidden_layer,
                    mean_network.step_cell_layer
                ], {mean_network.step_input_layer: feature_var}))

            self.l_log_std = l_log_std

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim

            self.prev_actions = None
            self.prev_hiddens = None
            self.prev_cells = None
            self.dist = RecurrentDiagonalGaussian(action_dim)

            out_layers = [mean_network.output_layer, l_log_std]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
Exemplo n.º 30
0
    def __init__(self,
                 env_spec,
                 name='MLPPhinet',
                 hidden_sizes=(100, 100),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 vs_form=None,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec
        self.vs_form = vs_form
        with tf.variable_scope(name):
            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            l_obs = L.InputLayer(shape=(None, obs_dim), name="obs")
            l_action = L.InputLayer(shape=(None, action_dim), name="action")

            self.obs_rms = RunningMeanStd(shape=(obs_dim, ))

            obz = L.NormalizeLayer(l_obs,
                                   rms=self.obs_rms,
                                   clip_min=-5.,
                                   clip_max=5.)

            obs_hidden = L.DenseLayer(obz,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="obs_h%d" % (0))
            print("hidden sizes...", hidden_sizes[0], hidden_sizes[1:])
            act_hidden = L.DenseLayer(l_action,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="act_h%d" % (0))
            merge_hidden = L.OpLayer(obs_hidden,
                                     op=lambda x, y: x + y,
                                     shape_op=lambda x, y: y,
                                     extras=[act_hidden])

            l_hidden = merge_hidden

            for idx, size in enumerate(hidden_sizes[1:]):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            if vs_form is not None:
                if vs_form == 'linear':
                    vs = L.DenseLayer(l_obs,
                                      num_units=1,
                                      nonlinearity=None,
                                      name='vs')

                elif vs_form == 'mlp':
                    vs = L.DenseLayer(l_obs,
                                      num_units=64,
                                      nonlinearity=tf.nn.relu,
                                      name='hidden_vs')
                    vs = L.DenseLayer(vs,
                                      num_units=1,
                                      nonlinearity=None,
                                      name='vs')
                else:
                    raise NotImplementedError

                output_var = L.get_output(l_output, deterministic=True) + \
                                    L.get_output(vs, deterministic=True)
                output_var = tf.reshape(output_var, (-1, ))
            else:
                output_var = L.get_output(l_output, deterministic=True)
                output_var = tf.reshape(output_var, (-1, ))

            self._f_phival = tensor_utils.compile_function(
                inputs=[l_obs.input_var, l_action.input_var],
                outputs=output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self.output_nonlinearity = output_nonlinearity

            if vs_form is not None:
                self._output_vs = vs
                LayersPowered.__init__(self, [l_output, self._output_vs])
            else:
                LayersPowered.__init__(self, [l_output])