Пример #1
0
    def __init__(self, name, input_shape, output_dim, hidden_dim, hidden_nonlinearity=tf.nn.relu,
                 lstm_layer_cls=L.LSTMLayer,
                 output_nonlinearity=None, input_var=None, input_layer=None, forget_bias=1.0, use_peepholes=False,
                 layer_args=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            l_step_input = L.InputLayer(shape=(None,) + input_shape, name="step_input")
            # contains previous hidden and cell state
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim * 2), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_lstm = lstm_layer_cls(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
                                    hidden_init_trainable=False, name="lstm", forget_bias=forget_bias,
                                    cell_init_trainable=False, use_peepholes=use_peepholes, **layer_args)
            l_lstm_flat = L.ReshapeLayer(
                l_lstm, shape=(-1, hidden_dim),
                name="lstm_flat"
            )
            l_output_flat = L.DenseLayer(
                l_lstm_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat"
            )
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input:
                tf.reshape(flat_output, tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output"
            )
            l_step_state = l_lstm.get_step_layer(l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = L.SliceLayer(l_step_state, indices=slice(hidden_dim), name="step_hidden")
            l_step_cell = L.SliceLayer(l_step_state, indices=slice(hidden_dim, None), name="step_cell")
            l_step_output = L.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                W=l_output_flat.W,
                b=l_output_flat.b,
                name="step_output"
            )

            self._l_in = l_in
            self._hid_init_param = l_lstm.h0
            self._cell_init_param = l_lstm.c0
            self._l_lstm = l_lstm
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_cell = l_step_cell
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Пример #2
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 input_var=None,
                 input_layer=None,
                 qmdp_param=None):
        with tf.variable_scope(name):
            hidden_dim = qmdp_param['grid_n'] * qmdp_param['grid_m']
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_step_input = L.InputLayer(shape=(None, ) + input_shape,
                                        name="step_input")
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim),
                                             name="step_prev_state")

            hidden_dim = qmdp_param['grid_n'] * qmdp_param['grid_m']
            l_gru = FilterLayer(l_in, qmdp_param, name="qmdp_filter")

            l_gru_flat = L.ReshapeLayer(l_gru,
                                        shape=(-1, hidden_dim),
                                        name="gru_flat")
            l_output_flat = PlannerLayer(l_gru_flat,
                                         qmdp_param,
                                         name="output_flat")

            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack(
                        (tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")
            l_step_state = l_gru.get_step_layer(l_step_input,
                                                l_step_prev_state,
                                                name="step_state")
            l_step_hidden = l_step_state
            l_step_output = l_output_flat.get_step_layer(l_step_hidden,
                                                         name="step_output")

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_output_flat = l_output_flat
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Пример #3
0
    def _split_extra(self, extra_data):
        """Sometimes we also have input data which goes straight to the
        network. We need to split this up into an unbound action->tensor
        dictionary just like the rest."""
        prob_meta = self._prob_meta
        out_dict = {}
        for unbound_act in prob_meta.domain.unbound_acts:
            ground_acts = prob_meta.schema_to_acts(unbound_act)
            sorted_acts = sorted(
                ground_acts, key=prob_meta.act_to_schema_subtensor_ind)
            if len(sorted_acts) == 0:
                # XXX: make this something scarier
                print("no actions for schema %s?" % unbound_act.schema_name)
            # these are the indices which we must read and concatenate
            tensor_inds = [
                # TODO: make this linear
                prob_meta.bound_acts_ordered.index(act) for act in sorted_acts
            ]

            # TODO: make commented stuff below work (e.g. by making sure all
            # ground actions are sorted by name or s.th)
            # start = min(tensor_inds)
            # stop = max(tensor_inds) + 1
            # approx_range = list(range(start, stop))
            # print('tensor_inds: ', tensor_inds)
            # print('approx_range: ', approx_range)
            # print('sorted_acts: ', sorted_acts)
            # print('bound_acts_ordered: ', prob_meta.bound_acts_ordered)
            # assert tensor_inds == approx_range, \
            #     "Order in which actions appear in input does not match " \
            #     "subtensor order."

            def python_closure_hatred(indices):
                """Runs a single tf.gather, for use within an OpLayer"""

                def inner(v):
                    return tf.gather(v, indices, axis=1)

                return inner

            def more_hate(tensor_inds):
                """Gives size of tensor returned by python_closure_hatred."""

                def inner(s):
                    return s[:1] + (len(tensor_inds), s[-1])

                return inner

            out_dict[unbound_act] = L.OpLayer(
                extra_data,
                python_closure_hatred(tensor_inds),
                more_hate(tensor_inds),
                name='split_extra/%s' % unbound_act.schema_name)

        return out_dict
Пример #4
0
    def __init__(self, name, input_shape, output_dim, hidden_dim, hidden_nonlinearity=tf.nn.relu,
                 gru_layer_cls=L.GRULayer,
                 output_nonlinearity=None, input_var=None, input_layer=None, layer_args=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            l_step_input = L.InputLayer(shape=(None,) + input_shape, name="step_input")
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_gru = gru_layer_cls(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
                                  hidden_init_trainable=False, name="gru", **layer_args)
            l_gru_flat = L.ReshapeLayer(
                l_gru, shape=(-1, hidden_dim),
                name="gru_flat"
            )
            l_output_flat = L.DenseLayer(
                l_gru_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat"
            )
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input:
                tf.reshape(flat_output, tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output"
            )
            l_step_state = l_gru.get_step_layer(l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = l_step_state
            l_step_output = L.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                W=l_output_flat.W,
                b=l_output_flat.b,
                name="step_output"
            )

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Пример #5
0
    def _merge_finals(self, final_acts: Dict[UnboundAction, Any]) -> Any:
        prob_meta = self._prob_meta
        # we make a huge tensor of actions that we'll have to reorder
        sorted_final_acts = sorted(final_acts.items(), key=lambda t: t[0])
        # also get some metadata about which positions in tensor correspond to
        # which schemas
        unbound_to_super_ind = {
            t[0]: idx
            for idx, t in enumerate(sorted_final_acts)
        }
        # indiv_sizes[i] is the number of bound acts associated with the i-th
        # schema
        indiv_sizes = [
            len(prob_meta.schema_to_acts(ub)) for ub, _ in sorted_final_acts
        ]
        # cumul_sizes[i] is the sum of the number of ground actions associated
        # with each action schema *before* the i-th schema
        cumul_sizes = np.cumsum([0] + indiv_sizes)
        # this stores indices that we have to look up
        gather_list = []
        for ground_act in prob_meta.bound_acts_ordered:
            subact_ind = prob_meta.act_to_schema_subtensor_ind(ground_act)
            superact_ind = unbound_to_super_ind[ground_act.prototype]
            actual_ind = cumul_sizes[superact_ind] + subact_ind
            assert 0 <= actual_ind < prob_meta.num_acts, \
                "action index %d for %r out of range [0, %d)" \
                % (actual_ind, ground_act, prob_meta.num_acts)
            gather_list.append(actual_ind)

        # now let's actually build and reorder our huge tensor of action
        # selection probs
        cat_super_acts = L.ConcatLayer(
            [t[1] for t in sorted_final_acts], axis=1, name='merge_finals/cat')
        rv = L.OpLayer(
            incoming=cat_super_acts,
            # the [:, :, 0] drops the single dimension on the last axis
            op=lambda t: tf.gather(t[:, :, 0], np.array(gather_list), axis=1),
            shape_op=lambda s: s,
            name='merge_finals/reorder')

        return rv
Пример #6
0
    def __init__(
        self,
        name,
        env_spec,
        hidden_dim=32,
        feature_network=None,
        state_include_action=True,
        hidden_nonlinearity=tf.tanh,
        learn_std=True,
        init_std=1.0,
        output_nonlinearity=None,
        lstm_layer_cls=L.LSTMLayer,
    ):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        with tf.variable_scope(name):
            Serializable.quick_init(self, locals())
            super(GaussianLSTMPolicy, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            mean_network = LSTMNetwork(input_shape=(feature_dim, ),
                                       input_layer=l_feature,
                                       output_dim=action_dim,
                                       hidden_dim=hidden_dim,
                                       hidden_nonlinearity=hidden_nonlinearity,
                                       output_nonlinearity=output_nonlinearity,
                                       lstm_layer_cls=lstm_layer_cls,
                                       name="mean_network")

            l_log_std = L.ParamLayer(
                mean_network.input_layer,
                num_units=action_dim,
                param=tf.constant_initializer(np.log(init_std)),
                name="output_log_std",
                trainable=learn_std,
            )

            l_step_log_std = L.ParamLayer(
                mean_network.step_input_layer,
                num_units=action_dim,
                param=l_log_std.param,
                name="step_output_log_std",
                trainable=learn_std,
            )

            self.mean_network = mean_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_mean_std = tensor_utils.compile_function(
                [
                    flat_input_var,
                    mean_network.step_prev_state_layer.input_var,
                ],
                L.get_output([
                    mean_network.step_output_layer, l_step_log_std,
                    mean_network.step_hidden_layer,
                    mean_network.step_cell_layer
                ], {mean_network.step_input_layer: feature_var}))

            self.l_log_std = l_log_std

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim

            self.prev_actions = None
            self.prev_hiddens = None
            self.prev_cells = None
            self.dist = RecurrentDiagonalGaussian(action_dim)

            out_layers = [mean_network.output_layer, l_log_std]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
    def __init__(self,
                 name,
                 env_spec,
                 hidden_dim=32,
                 feature_network=None,
                 prob_network=None,
                 state_include_action=True,
                 hidden_nonlinearity=tf.tanh,
                 forget_bias=1.0,
                 use_peepholes=False,
                 lstm_layer_cls=L.LSTMLayer):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        with tf.variable_scope(name):
            assert isinstance(env_spec.action_space, Discrete)
            Serializable.quick_init(self, locals())
            super(CategoricalLSTMPolicy, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            if prob_network is None:
                prob_network = LSTMNetwork(
                    input_shape=(feature_dim, ),
                    input_layer=l_feature,
                    output_dim=env_spec.action_space.n,
                    hidden_dim=hidden_dim,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    forget_bias=forget_bias,
                    use_peepholes=use_peepholes,
                    lstm_layer_cls=lstm_layer_cls,
                    name="prob_network")

            self.prob_network = prob_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_prob = tensor_utils.compile_function(
                [
                    flat_input_var,
                    #prob_network.step_prev_hidden_layer.input_var,
                    #prob_network.step_prev_cell_layer.input_var
                    prob_network.step_prev_state_layer.input_var,
                ],
                L.get_output([
                    prob_network.step_output_layer,
                    prob_network.step_hidden_layer,
                    prob_network.step_cell_layer
                ], {prob_network.step_input_layer: feature_var}))

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim

            self.prev_actions = None
            self.prev_hiddens = None
            self.prev_cells = None
            self.dist = RecurrentCategorical(env_spec.action_space.n)

            out_layers = [prob_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
Пример #8
0
    def __init__(
        self,
        name,
        env_spec,
        qmdp_param,
        feature_network=None,
        state_include_action=True,
    ):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """

        with tf.variable_scope(name):
            assert isinstance(env_spec.action_space, Discrete)
            Serializable.quick_init(self, locals())
            super(QMDPPolicy, self).__init__(env_spec)

            self.qmdp_param = qmdp_param

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.stack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))

            prob_network = QMDPNetwork(input_shape=(feature_dim, ),
                                       input_layer=l_feature,
                                       output_dim=env_spec.action_space.n,
                                       qmdp_param=qmdp_param,
                                       name="prob_network")

            self.prob_network = prob_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.f_step_prob = tensor_utils.compile_function(
                [
                    flat_input_var,
                    # prob_network.step_prev_hidden_layer.input_var
                    prob_network.step_prev_state_layer.input_var
                ],
                L.get_output([
                    prob_network.step_output_layer,
                    prob_network.step_hidden_layer
                ], {prob_network.step_input_layer: feature_var}))

            self.debug = tensor_utils.compile_function(
                [
                    flat_input_var,
                    # prob_network.step_prev_hidden_layer.input_var
                    prob_network.step_prev_state_layer.input_var
                ],
                # [self.prob_network._l_output_flat.plannernet.printQ]
                [
                    # self.prob_network._l_output_flat.plannernet.f_pi.fclayers.fclayers[0].w,
                    self.prob_network._l_output_flat.R0,
                    self.prob_network._l_gru.z_os
                ])

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = qmdp_param['num_state']

            self.prev_actions = None
            self.prev_hiddens = None
            self.dist = RecurrentCategorical(env_spec.action_space.n)

            out_layers = [prob_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

            LayersPowered.__init__(self, out_layers)
Пример #9
0
    def __init__(self,
                 env_spec,
                 name='MLPPhinet',
                 hidden_sizes=(100, 100),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 vs_form=None,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec
        self.vs_form = vs_form
        with tf.variable_scope(name):
            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            l_obs = L.InputLayer(shape=(None, obs_dim), name="obs")
            l_action = L.InputLayer(shape=(None, action_dim), name="action")

            self.obs_rms = RunningMeanStd(shape=(obs_dim, ))

            obz = L.NormalizeLayer(l_obs,
                                   rms=self.obs_rms,
                                   clip_min=-5.,
                                   clip_max=5.)

            obs_hidden = L.DenseLayer(obz,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="obs_h%d" % (0))
            print("hidden sizes...", hidden_sizes[0], hidden_sizes[1:])
            act_hidden = L.DenseLayer(l_action,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="act_h%d" % (0))
            merge_hidden = L.OpLayer(obs_hidden,
                                     op=lambda x, y: x + y,
                                     shape_op=lambda x, y: y,
                                     extras=[act_hidden])

            l_hidden = merge_hidden

            for idx, size in enumerate(hidden_sizes[1:]):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            if vs_form is not None:
                if vs_form == 'linear':
                    vs = L.DenseLayer(l_obs,
                                      num_units=1,
                                      nonlinearity=None,
                                      name='vs')

                elif vs_form == 'mlp':
                    vs = L.DenseLayer(l_obs,
                                      num_units=64,
                                      nonlinearity=tf.nn.relu,
                                      name='hidden_vs')
                    vs = L.DenseLayer(vs,
                                      num_units=1,
                                      nonlinearity=None,
                                      name='vs')
                else:
                    raise NotImplementedError

                output_var = L.get_output(l_output, deterministic=True) + \
                                    L.get_output(vs, deterministic=True)
                output_var = tf.reshape(output_var, (-1, ))
            else:
                output_var = L.get_output(l_output, deterministic=True)
                output_var = tf.reshape(output_var, (-1, ))

            self._f_phival = tensor_utils.compile_function(
                inputs=[l_obs.input_var, l_action.input_var],
                outputs=output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self.output_nonlinearity = output_nonlinearity

            if vs_form is not None:
                self._output_vs = vs
                LayersPowered.__init__(self, [l_output, self._output_vs])
            else:
                LayersPowered.__init__(self, [l_output])
Пример #10
0
 def make_gather_layer(inds_to_fetch, pred_name):
     return L.OpLayer(
         obs_layer,
         lambda v: tf.gather(v, inds_to_fetch, axis=1),
         lambda s: s[:1] + (len(inds_to_fetch), s[1]),
         name='split_input/' + pred_name)
Пример #11
0
    def _make_mlp(self):
        hidden_sizes = self._weight_manager.hidden_sizes
        dom_meta = self._weight_manager.dom_meta
        prob_meta = self._prob_meta

        # input vector spec:
        #
        # |<--num_acts-->|<--k*num_acts-->|<--num_props-->|
        # | action mask  |  action data   | propositions  |
        #
        # 1) `action_mask` tells us whether actions are enabled
        # 2) `action_data` is passed straight to action modules
        # 3) `propositions` tells us what is and isn't true
        #
        # Reminder: this convoluted input shape is required solely because of
        # rllab inflexible input conventions (it can only take a single vector
        # per state).

        mask_size = prob_meta.num_acts
        extra_data_dim = self._weight_manager.extra_dim
        extra_size = extra_data_dim * prob_meta.num_acts
        prop_size = prob_meta.num_props
        in_dim = mask_size + extra_size + prop_size
        l_in = L.InputLayer(shape=(None, in_dim))
        l_mask = L.OpLayer(
            l_in,
            lambda inv: inv[:, :mask_size],
            lambda s: s[:1] + (mask_size, ) + s[2:],
            name='split/mask')

        def act_extra_inner(in_vec):
            act_vecs = in_vec[:, mask_size:mask_size + extra_size]
            # unflatten
            # inner_shape = tf.TensorShape(
            #     (prob_meta.num_acts, extra_data_dim))
            # out_shape = act_vecs.shape[:1] + inner_shape
            out_shape = (-1, prob_meta.num_acts, extra_data_dim)
            return tf.reshape(act_vecs, out_shape)

        def obs_inner(in_vec):
            prop_truth = in_vec[:, mask_size + extra_size:, None]
            goal_vec = [
                float(prop in prob_meta.goal_props)
                for prop in prob_meta.bound_props_ordered
            ]

            assert sum(goal_vec) == len(prob_meta.goal_props)
            assert any(goal_vec), 'there are no goals?!'
            assert not all(goal_vec), 'there are no goals?!'

            # apparently this broadcasts (hooray!)
            tf_goals = tf.constant(goal_vec)[None, :, None]
            batch_size = tf.shape(prop_truth)[0]
            tf_goals_broad = tf.tile(tf_goals, (batch_size, 1, 1))
            return tf.concat([prop_truth, tf_goals_broad], axis=2)

        l_obs = L.OpLayer(
            l_in,
            obs_inner,
            lambda s: s[:1] + (prop_size, 2),
            name='split/obs')
        pred_dict = self._split_input(l_obs)
        if extra_data_dim > 0:
            l_act_extra = L.OpLayer(
                l_in,
                act_extra_inner,
                lambda s: s[:1] + (prob_meta.num_acts, extra_data_dim),
                name='split/extra')
            extra_dict = self._split_extra(l_act_extra)
        else:
            extra_dict = None

        # hidden layers
        for hid_idx, hid_sizes in enumerate(hidden_sizes):
            act_size, prop_size = hid_sizes

            act_dict = {}
            for unbound_act in dom_meta.unbound_acts:
                act_dict[unbound_act] = self._make_action_module(
                    pred_dict,
                    unbound_act,
                    act_size,
                    hid_idx,
                    l_in,
                    dropout=self.dropout,
                    norm_response=self.norm_response,
                    extra_dict=extra_dict)

            pred_dict = {}
            for pred_name in dom_meta.pred_names:
                pred_dict[pred_name] = self._make_prop_module(
                    act_dict,
                    pred_name,
                    prop_size,
                    hid_idx,
                    l_in,
                    dropout=self.dropout,
                    norm_response=self.norm_response)

        # final (action) layer
        finals = {}
        for unbound_act in dom_meta.unbound_acts:
            finals[unbound_act] = self._make_action_module(
                pred_dict,
                unbound_act,
                1,
                len(hidden_sizes),
                l_in,
                nonlinearity=tf.identity,
                # can't have ANY dropout in final layer!
                dropout=0.0,
                # or normalisation
                norm_response=False,
                extra_dict=extra_dict)
        l_pre_softmax = self._merge_finals(finals)
        self._l_out = L.OpLayer(
            l_pre_softmax, masked_softmax, extras=[l_mask], name='l_out')
        self._l_in = l_in
Пример #12
0
    def __init__(
        self,
        name,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        hidden_W_init=L.XavierUniformInitializer(),
        hidden_b_init=tf.zeros_initializer(),
        output_W_init=L.XavierUniformInitializer(),
        output_b_init=tf.zeros_initializer(),
        input_var=None,
        input_layer=None,
        input_shape=None,
        batch_normalization=False,
        weight_normalization=False,
    ):

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                assert input_shape is not None, \
                    "input_layer or input_shape must be supplied"
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_out_raw = L.DenseLayer(l_hid,
                                     num_units=output_dim,
                                     name="output",
                                     W=output_W_init,
                                     b=output_b_init,
                                     weight_normalization=weight_normalization)
            if batch_normalization:
                l_out_raw = L.batch_norm(l_out_raw)
            self._layers.append(l_out_raw)

            # mask assumed to occupy first output_dim elements
            def mask_op(X):
                return X[..., :output_dim]

            def mask_shape_op(old_shape):
                return old_shape[:-1] + (output_dim, )

            mask = L.OpLayer(l_in, mask_op, shape_op=mask_shape_op)
            self._layers.append(mask)
            l_out = L.OpLayer(l_out_raw, masked_softmax, extras=[mask])
            self._layers.append(l_out)

            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Пример #13
0
    def __init__(self,
                 env_spec,
                 name='Phinet',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None,
                                        env_spec.observation_space.flat_dim),
                                 name="obs")
            l_action = L.InputLayer(shape=(None,
                                           env_spec.action_space.flat_dim),
                                    name="action")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            # self.obs_rms = RunningMeanStd(shape=(env_spec.observation_space.flat_dim, ))

            # obz = L.NormalizeLayer(l_obs, rms=self.obs_rms, clip_min=-5., clip_max=5.)
            obz = l_obs
            obs_hidden = L.DenseLayer(obz,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="obs_h%d" % (0))

            act_hidden = L.DenseLayer(l_action,
                                      num_units=hidden_sizes[0],
                                      nonlinearity=hidden_nonlinearity,
                                      name="act_h%d" % (0))

            merge_hidden = L.OpLayer(obs_hidden,
                                     op=lambda x, y: x + y,
                                     shape_op=lambda x, y: x,
                                     extras=[act_hidden])

            l_hidden = merge_hidden

            for idx, size in enumerate(hidden_sizes[1:]):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            # for idx, size in enumerate(hidden_sizes):
            #     if bn:
            #         l_hidden = batch_norm(l_hidden)

            #     if idx == action_merge_layer:
            #         l_hidden = L.ConcatLayer([l_hidden, l_action])

            #     l_hidden = L.DenseLayer(
            #         l_hidden,
            #         num_units=size,
            #         nonlinearity=hidden_nonlinearity,
            #         name="h%d" % (idx + 1)
            #     )

            # if action_merge_layer == n_layers:
            #     l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            output_var = L.get_output(l_output, deterministic=True)
            output_var = tf.reshape(output_var, (-1, ))

            self._f_phival = tensor_utils.compile_function(
                [l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self.output_nonlinearity = output_nonlinearity

            LayersPowered.__init__(self, [l_output])
Пример #14
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 hidden_dims,
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_step_input = L.InputLayer(shape=(None, ) + input_shape,
                                        name="step_input")
            l_step_prev_hiddens = [
                L.InputLayer(shape=(None, hidden_dim),
                             name="step_prev_hidden%i" % i)
                for i, hidden_dim in enumerate(hidden_dims)
            ]

            # Build the unrolled GRU network, which operates laterally, then
            # vertically
            below = l_in
            l_grus = []
            for i, hidden_dim in enumerate(hidden_dims):
                l_gru = L.GRULayer(below,
                                   num_units=hidden_dim,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   hidden_init_trainable=False,
                                   name="gru%i" % i)
                l_grus.append(l_gru)
                below = l_gru

            # Convert final hidden layer to flat representation
            l_gru_flat = L.ReshapeLayer(l_grus[-1],
                                        shape=(-1, hidden_dims[-1]),
                                        name="gru_flat")
            l_output_flat = L.DenseLayer(l_gru_flat,
                                         num_units=output_dim,
                                         nonlinearity=output_nonlinearity,
                                         name="output_flat")
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.pack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")

            # Build a single step of the GRU network, which operates vertically
            # and is replicated laterally
            below = l_step_input
            l_step_hiddens = []
            for i, (l_gru,
                    prev_hidden) in enumerate(zip(l_grus,
                                                  l_step_prev_hiddens)):
                l_step_hidden = L.GRUStepLayer([below, prev_hidden],
                                               "step_hidden%i" % i, l_gru)
                l_step_hiddens.append(l_step_hidden)
                below = l_step_hidden

            l_step_output = L.DenseLayer(l_step_hiddens[-1],
                                         num_units=output_dim,
                                         nonlinearity=output_nonlinearity,
                                         W=l_output_flat.W,
                                         b=l_output_flat.b,
                                         name="step_output")

            self._l_in = l_in
            self._hid_inits = [l_gru.h0 for l_gru in l_grus]
            self._l_grus = l_grus
            self._l_out = l_output

            self._l_step_input = l_step_input
            self._l_step_prev_hiddens = l_step_prev_hiddens
            self._l_step_hiddens = l_step_hiddens
            self._l_step_output = l_step_output
Пример #15
0
    def __init__(
        self,
        name,
        env_spec,
        hidden_dim=32,
        feature_network=None,
        state_include_action=True,
        hidden_nonlinearity=tf.tanh,
        weight_normalization=False,
        layer_normalization=False,
        optimizer=None,
        # these are only used when computing predictions in batch
        batch_size=None,
        n_steps=None,
        log_loss_before=True,
        log_loss_after=True,
        moments_update_rate=0.9,
    ):
        Serializable.quick_init(self, locals())
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """

        self.observation_space = env_spec.observation_space
        self.action_space = env_spec.action_space

        with tf.variable_scope(name):
            super(L2RNNBaseline, self).__init__(env_spec)

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            if state_include_action:
                input_dim = obs_dim + action_dim
            else:
                input_dim = obs_dim

            l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

            if feature_network is None:
                feature_dim = input_dim
                l_flat_feature = None
                l_feature = l_input
            else:
                feature_dim = feature_network.output_layer.output_shape[-1]
                l_flat_feature = feature_network.output_layer
                l_feature = L.OpLayer(
                    l_flat_feature,
                    extras=[l_input],
                    name="reshape_feature",
                    op=lambda flat_feature, input: tf.reshape(
                        flat_feature,
                        tf.pack([
                            tf.shape(input)[0],
                            tf.shape(input)[1], feature_dim
                        ])),
                    shape_op=lambda _, input_shape:
                    (input_shape[0], input_shape[1], feature_dim))
            prediction_network = GRUNetwork(
                input_shape=(feature_dim, ),
                input_layer=l_feature,
                output_dim=1,
                hidden_dim=hidden_dim,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=None,
                name="prediction_network")
            self.prediction_network = prediction_network
            self.feature_network = feature_network
            self.l_input = l_input
            self.state_include_action = state_include_action

            flat_input_var = tf.placeholder(dtype=tf.float32,
                                            shape=(None, input_dim),
                                            name="flat_input")
            if feature_network is None:
                feature_var = flat_input_var
            else:
                feature_var = L.get_output(
                    l_flat_feature,
                    {feature_network.input_layer: flat_input_var})

            self.input_dim = input_dim
            self.action_dim = action_dim
            self.hidden_dim = hidden_dim
            self.state_dim = prediction_network.state_dim
            self.batch_size = batch_size
            self.n_steps = n_steps

            self.prev_actions = None
            self.prev_states = None

            out_layers = [prediction_network.output_layer]
            if feature_network is not None:
                out_layers.append(feature_network.output_layer)

        if optimizer is None:
            optimizer = TBPTTOptimizer()

        self.optimizer = optimizer
        self.log_loss_before = log_loss_before
        self.log_loss_after = log_loss_after
        self.moments_update_rate = moments_update_rate

        state_input_var = tf.placeholder(tf.float32,
                                         (None, prediction_network.state_dim),
                                         "state")
        recurrent_state_output = dict()

        if feature_network is not None:
            predict_flat_input_var = tf.reshape(
                l_input.input_var,
                tf.pack((tf.shape(l_input.input_var)[0] *
                         tf.shape(l_input.input_var)[1],
                         tf.shape(l_input.input_var)[2])))
            layer_data = {feature_network.input_layer: predict_flat_input_var}
        else:
            layer_data = dict()

        prediction_var = L.get_output(
            prediction_network.output_layer,
            layer_data,
            recurrent_state={
                prediction_network.recurrent_layer: state_input_var
            },
            recurrent_state_output=recurrent_state_output,
        )
        direct_prediction_var = L.get_output(prediction_network.output_layer,
                                             layer_data)

        state_output = recurrent_state_output[
            prediction_network.recurrent_layer]
        final_state = tf.reverse(state_output, [1])[:, 0, :]

        return_var = tf.placeholder(dtype=tf.float32,
                                    shape=(None, None),
                                    name="return")
        valid_var = tf.placeholder(dtype=tf.float32,
                                   shape=(None, None),
                                   name="valid")

        return_mean_var = tf.Variable(
            np.cast['float32'](0.),
            name="return_mean",
        )
        return_std_var = tf.Variable(
            np.cast['float32'](1.),
            name="return_std",
        )

        normalized_return_var = (return_var - return_mean_var) / return_std_var

        residue = tf.reshape(prediction_var,
                             (-1, )) - tf.reshape(normalized_return_var,
                                                  (-1, ))

        loss_var = tf.reduce_sum(
            tf.square(residue) * tf.reshape(valid_var,
                                            (-1, ))) / tf.reduce_sum(valid_var)

        self.f_predict = tensor_utils.compile_function(
            inputs=[l_input.input_var],
            outputs=direct_prediction_var * return_std_var + return_mean_var,
        )
        self.f_predict_stateful = tensor_utils.compile_function(
            inputs=[l_input.input_var, state_input_var],
            outputs=[
                prediction_var * return_std_var + return_mean_var, final_state
            ],
        )

        return_mean_stats = tf.reduce_sum(
            return_var * valid_var) / tf.reduce_sum(valid_var)
        return_std_stats = tf.sqrt(
            tf.reduce_sum(tf.square(return_var - return_mean_var) * valid_var)
            / tf.reduce_sum(valid_var))

        self.f_update_stats = tensor_utils.compile_function(
            inputs=[return_var, valid_var],
            outputs=[
                tf.assign(
                    return_mean_var,
                    (1 - self.moments_update_rate) * return_mean_var + \
                    self.moments_update_rate * return_mean_stats,
                ),
                tf.assign(
                    return_std_var,
                    (1 - self.moments_update_rate) * return_std_var + \
                    self.moments_update_rate * return_std_stats,
                )
            ]
        )

        self.return_mean_var = return_mean_var
        self.return_std_var = return_std_var
        LayersPowered.__init__(self, out_layers)

        self.optimizer.update_opt(
            loss=loss_var,
            target=self,
            inputs=[l_input.input_var, return_var, valid_var],
            rnn_state_input=state_input_var,
            rnn_final_state=final_state,
            rnn_init_state=prediction_network.state_init_param,
        )