def __init__(self, name, env_spec, hidden_dim=32, feature_network=None, prob_network=None, state_include_action=True, hidden_nonlinearity=tf.tanh, forget_bias=1.0, use_peepholes=False, lstm_layer_cls=L.LSTMLayer): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ with tf.variable_scope(name): assert isinstance(env_spec.action_space, Discrete) Serializable.quick_init(self, locals()) super(CategoricalLSTMPolicy, self).__init__(env_spec) obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)) if prob_network is None: prob_network = LSTMNetwork( input_shape=(feature_dim, ), input_layer=l_feature, output_dim=env_spec.action_space.n, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, forget_bias=forget_bias, use_peepholes=use_peepholes, lstm_layer_cls=lstm_layer_cls, name="prob_network") self.prob_network = prob_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) self.f_step_prob = tensor_utils.compile_function( [ flat_input_var, #prob_network.step_prev_hidden_layer.input_var, #prob_network.step_prev_cell_layer.input_var prob_network.step_prev_state_layer.input_var, ], L.get_output([ prob_network.step_output_layer, prob_network.step_hidden_layer, prob_network.step_cell_layer ], {prob_network.step_input_layer: feature_var})) self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.prev_actions = None self.prev_hiddens = None self.prev_cells = None self.dist = RecurrentCategorical(env_spec.action_space.n) out_layers = [prob_network.output_layer] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)
def __init__( self, name, env_spec, qmdp_param, feature_network=None, state_include_action=True, ): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ with tf.variable_scope(name): assert isinstance(env_spec.action_space, Discrete) Serializable.quick_init(self, locals()) super(QMDPPolicy, self).__init__(env_spec) self.qmdp_param = qmdp_param obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)) prob_network = QMDPNetwork(input_shape=(feature_dim, ), input_layer=l_feature, output_dim=env_spec.action_space.n, qmdp_param=qmdp_param, name="prob_network") self.prob_network = prob_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) self.f_step_prob = tensor_utils.compile_function( [ flat_input_var, # prob_network.step_prev_hidden_layer.input_var prob_network.step_prev_state_layer.input_var ], L.get_output([ prob_network.step_output_layer, prob_network.step_hidden_layer ], {prob_network.step_input_layer: feature_var})) self.debug = tensor_utils.compile_function( [ flat_input_var, # prob_network.step_prev_hidden_layer.input_var prob_network.step_prev_state_layer.input_var ], # [self.prob_network._l_output_flat.plannernet.printQ] [ # self.prob_network._l_output_flat.plannernet.f_pi.fclayers.fclayers[0].w, self.prob_network._l_output_flat.R0, self.prob_network._l_gru.z_os ]) self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = qmdp_param['num_state'] self.prev_actions = None self.prev_hiddens = None self.dist = RecurrentCategorical(env_spec.action_space.n) out_layers = [prob_network.output_layer] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)