예제 #1
0
    def create_model(self, model_info):
        """Create Deep-Q CNN network."""
        state = Input(shape=self.state_dim, dtype="uint8")
        state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state)
        convlayer = Conv2D(32, (8, 8),
                           strides=(4, 4),
                           activation='relu',
                           padding='valid')(state1)
        convlayer = Conv2D(64, (4, 4),
                           strides=(2, 2),
                           activation='relu',
                           padding='valid')(convlayer)
        convlayer = Conv2D(64, (3, 3),
                           strides=(1, 1),
                           activation='relu',
                           padding='valid')(convlayer)
        flattenlayer = Flatten()(convlayer)
        denselayer = Dense(256, activation='relu')(flattenlayer)
        value = Dense(self.action_dim, activation='linear')(denselayer)
        model = Model(inputs=state, outputs=value)
        adam = Adam(lr=self.learning_rate, clipnorm=10.)
        model.compile(loss='mse', optimizer=adam)
        if model_info.get("summary"):
            model.summary()

        self.infer_state = tf.placeholder(tf.uint8,
                                          name="infer_input",
                                          shape=(None, ) +
                                          tuple(self.state_dim))
        self.infer_v = model(self.infer_state)
        self.actor_var = TFVariables([self.infer_v], self.sess)

        self.sess.run(tf.initialize_all_variables())
        return model
예제 #2
0
def get_mlp_backbone(state_dim,
                     act_dim,
                     hidden_sizes,
                     activation,
                     vf_share_layers=False,
                     summary=False):
    """Get mlp backbone."""

    state_input = Input(shape=state_dim, name='obs')

    if not vf_share_layers:
        dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes,
                                          activation, 'pi')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer_pi)
        dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation,
                                         'v')
        out_value = Dense(1, activation=None,
                          name='output_value')(dense_layer_v)
    else:
        dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation,
                                       'shared')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer)
        out_value = Dense(1, activation=None, name='output_value')(dense_layer)

    model = Model(inputs=[state_input], outputs=[pi_latent, out_value])
    if summary:
        model.summary()

    return model
예제 #3
0
    def create_model(self, model_info):
        state_input = Input(shape=self.state_dim, name='state_input', dtype='uint8')
        state_input_1 = Lambda(layer_function)(state_input)
        advantage = Input(shape=(1, ), name='adv')

        convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state_input_1)
        convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer)
        convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer)
        flattenlayer = Flatten()(convlayer)
        denselayer = Dense(256, activation='relu')(flattenlayer)

        out_actions = Dense(self.action_dim, activation='softmax', name='output_actions')(denselayer)
        out_value = Dense(1, name='output_value')(denselayer)
        model = Model(inputs=[state_input, advantage], outputs=[out_actions, out_value])
        losses = {"output_actions": impala_loss(advantage), "output_value": 'mse'}
        lossweights = {"output_actions": 1.0, "output_value": .5}

        decay_value = 0.00000000512
        model.compile(optimizer=Adam(lr=LR, clipnorm=40., decay=decay_value), loss=losses, loss_weights=lossweights)

        self.infer_state = tf.placeholder(tf.uint8, name="infer_state",
                                          shape=(None,) + tuple(self.state_dim))
        self.adv = tf.placeholder(tf.float32, name="adv", shape=(None, 1))
        self.infer_p, self.infer_v = model([self.infer_state, self.adv])
        self.sess.run(tf.initialize_all_variables())

        return model
예제 #4
0
    def create_model(self, model_info):
        """Create keras model."""
        state_input = Input(shape=self.state_dim, name='state_input')
        advantage = Input(shape=(1, ), name='adv')

        denselayer = Dense(HIDDEN_SIZE, activation='relu')(state_input)
        for _ in range(NUM_LAYERS - 1):
            denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer)

        out_actions = Dense(self.action_dim,
                            activation='softmax',
                            name='output_actions')(denselayer)  # y_pred
        out_value = Dense(1, name='output_value')(denselayer)
        model = Model(inputs=[state_input, advantage],
                      outputs=[out_actions, out_value])
        losses = {
            "output_actions": impala_loss(advantage),
            "output_value": 'mse'
        }
        lossweights = {"output_actions": 1.0, "output_value": .5}

        model.compile(optimizer=Adam(lr=LR),
                      loss=losses,
                      loss_weights=lossweights)

        self.infer_state = tf.placeholder(tf.float32,
                                          name="infer_state",
                                          shape=(None, ) +
                                          tuple(self.state_dim))
        self.adv = tf.placeholder(tf.float32, name="adv", shape=(None, 1))
        self.infer_p, self.infer_v = model([self.infer_state, self.adv])
        self.actor_var = TFVariables([self.infer_p, self.infer_v], self.sess)
        self.sess.run(tf.initialize_all_variables())

        return model
예제 #5
0
 def create_dyn_network(self):
     conditioned_hidden = Input(shape=HIDDEN_OUT + self.action_dim)
     hidden = Dense(256, activation='relu')(conditioned_hidden)
     hidden = Dense(128, activation='relu')(hidden)
     out_h = Dense(HIDDEN_OUT, activation='relu')(hidden)
     out_r = Dense(self.reward_support_size, activation='softmax')(hidden)
     return Model(inputs=conditioned_hidden, outputs=[out_h, out_r])
예제 #6
0
 def create_dyn_network(self):
     conditioned_hidden = Input(shape=HIDDEN2_UNITS + self.action_dim)
     hidden = Dense(HIDDEN1_UNITS, activation='relu')(conditioned_hidden)
     out_h = Dense(HIDDEN2_UNITS, activation='relu')(hidden)
     # out_h = Lambda(hidden_normlize)(out_h)
     out_r = Dense(self.reward_support_size, activation='softmax')(hidden)
     return Model(inputs=conditioned_hidden, outputs=[out_h, out_r])
예제 #7
0
    def create_model(self, model_info):
        """Create Deep-Q network."""
        state = Input(shape=self.state_dim)
        denselayer = Dense(HIDDEN_SIZE, activation='relu')(state)
        for _ in range(NUM_LAYERS - 1):
            denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer)

        value = Dense(self.action_dim, activation='linear')(denselayer)
        if self.dueling:
            adv = Dense(1, activation='linear')(denselayer)
            mean = Lambda(layer_normalize)(value)
            value = Lambda(layer_add)([adv, mean])

        model = Model(inputs=state, outputs=value)
        adam = Adam(lr=self.learning_rate)
        model.compile(loss='mse', optimizer=adam)

        self.infer_state = tf.placeholder(tf.float32,
                                          name="infer_input",
                                          shape=(None, ) +
                                          tuple(self.state_dim))
        self.infer_v = model(self.infer_state)
        self.actor_var = TFVariables([self.infer_v], self.sess)

        self.sess.run(tf.initialize_all_variables())
        return model
예제 #8
0
 def create_model(self, model_info):
     """method for creating DQN Q network"""
     state = Input(shape=self.state_dim)
     denselayer = Dense(HIDDEN_SIZE, activation='relu')(state)
     for _ in range(NUM_LAYERS - 1):
         denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer)
     value = Dense(self.action_dim, activation='linear')(denselayer)
     model = Model(inputs=state, outputs=value)
     adam = Adam(lr=self.learning_rate)
     model.compile(loss='mse', optimizer=adam)
     return model
예제 #9
0
def get_cnn_backbone(state_dim,
                     act_dim,
                     hidden_sizes,
                     activation,
                     filter_arches,
                     vf_share_layers=True,
                     summary=False,
                     dtype='uint8'):
    """Get CNN backbone."""
    state_input_raw = Input(shape=state_dim, name='obs')
    if dtype == 'uint8':
        state_input = Lambda(layer_function)(state_input_raw)
    elif dtype == 'float32':
        state_input = state_input_raw
    else:
        raise ValueError(
            'dtype: {} not supported automatically, please implement it yourself'
            .format(dtype))

    if vf_share_layers:
        conv_layer = build_conv_layers(state_input, filter_arches, activation,
                                       'shared')
        flatten_layer = Flatten()(conv_layer)
        dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation,
                                       'shared')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer)
        out_value = Dense(1, activation=None, name='output_value')(dense_layer)
    else:
        conv_layer_pi = build_conv_layers(state_input, filter_arches,
                                          activation, 'pi')
        conv_layer_v = build_conv_layers(state_input, filter_arches,
                                         activation, 'v')
        flatten_layer_pi = Flatten()(conv_layer_pi)
        flatten_layer_v = Flatten()(conv_layer_v)
        dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes,
                                          activation, 'pi')
        dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes,
                                         activation, 'v')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer_pi)
        out_value = Dense(1, activation=None,
                          name='output_value')(dense_layer_v)

    model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value])
    if summary:
        model.summary()

    return model
예제 #10
0
    def create_model(self, model_info):
        state_input = Input(shape=self.state_dim, name='state_input', dtype='uint8')
        state_input_1 = Lambda(layer_function)(state_input)
        convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state_input_1)
        convlayer = Conv2D(32, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer)
        convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer)
        flattenlayer = Flatten()(convlayer)
        denselayer = Dense(256, activation='relu', name='dense_1')(flattenlayer)
        out_actions = Dense(self.action_dim, activation='softmax', name='output_actions_raw')(denselayer)
        out_value = Dense(1, name='output_value')(denselayer)
        model = Model(inputs=[state_input], outputs=[out_actions, out_value])

        self.build_graph(np.uint8, model)

        return model
예제 #11
0
    def create_model(self, model_info):
        state_input = Input(shape=self.state_dim, name='state_input')
        advantage = Input(shape=(1, ), name='adv')
        old_prediction = Input(shape=(self.action_dim, ), name='old_p')
        old_value = Input(shape=(1, ), name='old_v')

        denselayer = Dense(HIDDEN_SIZE, activation='relu')(state_input)
        for _ in range(NUM_LAYERS - 1):
            denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer)
        out_actions = Dense(self.action_dim,
                            activation='softmax',
                            name='output_actions')(denselayer)
        out_value = Dense(1, name='output_value')(denselayer)
        model = Model(inputs=[state_input], outputs=[out_actions, out_value])
        if model_info.get("summary"):
            model.summary()

        self.build_graph(tf.float32, model)
        return model
예제 #12
0
 def create_rep_network(self):
     obs = Input(shape=self.state_dim, name='rep_input')
     obs_1 = Lambda(lambda x: tf.cast(x, dtype='float32') / 255.)(obs)
     convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(obs_1)
     convlayer = Conv2D(32, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer)
     convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer)
     flattenlayer = Flatten()(convlayer)
     denselayer = Dense(HIDDEN_OUT, activation='relu')(flattenlayer)
     # hidden = Lambda(hidden_normlize)(denselayer)
     hidden = denselayer
     return Model(inputs=obs, outputs=hidden)
예제 #13
0
def get_cnn_backbone(state_dim,
                     act_dim,
                     hidden_sizes,
                     activation,
                     filter_arches,
                     vf_share_layers=True,
                     summary=False):
    """Get CNN backbone."""
    state_input_raw = Input(shape=state_dim, name='obs')
    state_input = Lambda(layer_function)(state_input_raw)

    if vf_share_layers:
        conv_layer = build_conv_layers(state_input, filter_arches, activation,
                                       'shared')
        flatten_layer = Flatten()(conv_layer)
        dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation,
                                       'shared')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer)
        out_value = Dense(1, activation=None, name='output_value')(dense_layer)
    else:
        conv_layer_pi = build_conv_layers(state_input, filter_arches,
                                          activation, 'pi')
        conv_layer_v = build_conv_layers(state_input, filter_arches,
                                         activation, 'v')
        flatten_layer_pi = Flatten()(conv_layer_pi)
        flatten_layer_v = Flatten()(conv_layer_v)
        dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes,
                                          activation, 'pi')
        dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes,
                                         activation, 'v')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer_pi)
        out_value = Dense(1, activation=None,
                          name='output_value')(dense_layer_v)

    model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value])
    if summary:
        model.summary()

    return model
예제 #14
0
def get_mlp_backbone(state_dim,
                     act_dim,
                     hidden_sizes,
                     activation,
                     vf_share_layers=False,
                     summary=False,
                     dtype='float32'):
    """Get mlp backbone."""

    state_input_raw = Input(shape=state_dim, name='obs')
    if dtype == 'float32':
        state_input = state_input_raw
    else:
        raise ValueError(
            'dtype: {} not supported automatically, please implement it yourself'
            .format(dtype))

    if not vf_share_layers:
        dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes,
                                          activation, 'pi')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer_pi)
        dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation,
                                         'v')
        out_value = Dense(1, activation=None,
                          name='output_value')(dense_layer_v)
    else:
        dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation,
                                       'shared')
        pi_latent = Dense(act_dim, activation=None,
                          name='pi_latent')(dense_layer)
        out_value = Dense(1, activation=None, name='output_value')(dense_layer)

    model = Model(inputs=[state_input], outputs=[pi_latent, out_value])
    if summary:
        model.summary()

    return model
예제 #15
0
def bulid_mlp_layers(input_layer, hidden_sizes, activation, prefix=''):
    output_layer = input_layer
    for i, hidden_size in enumerate(hidden_sizes):
        output_layer = \
            Dense(hidden_size, activation=activation, name='{}_hidden_mlp_{}'.format(prefix, i))(output_layer)
    return output_layer
예제 #16
0
    def create_model(self, model_info):
        """Create Deep-Q network."""

        user_input = Input(shape=(self.user_dim,), name="user_input", dtype=self.input_type)
        history_click_input = Input(
            shape=(self.n_history_click * self.item_dim), name="history_click",
            dtype=self.input_type
        )
        history_no_click_input = Input(
            shape=(self.n_history_no_click * self.item_dim), name="history_no_click",
            dtype=self.input_type
        )
        item_input = Input(shape=(self.item_dim,), name="item_input", dtype=self.input_type)
        shared_embedding = Embedding(
            self.vocab_size,
            self.emb_dim,
            name="Emb",
            mask_zero=True,
            embeddings_initializer=self.embedding_initializer,
            trainable=False,
        )  # un-trainable
        gru_click = GRU(self.item_dim * self.emb_dim)
        gru_no_click = GRU(self.item_dim * self.emb_dim)

        user_feature = Flatten()(shared_embedding(user_input))
        item_feature = Flatten()(shared_embedding(item_input))

        history_click_feature = Reshape(
            (self.n_history_click, self.item_dim * self.emb_dim)
        )(shared_embedding(history_click_input))
        history_click_feature = gru_click(history_click_feature)

        history_no_click_feature = Reshape(
            (self.n_history_no_click, self.item_dim * self.emb_dim)
        )(shared_embedding(history_no_click_input))
        history_no_click_feature = gru_no_click(history_no_click_feature)

        x = concatenate(
            [
                user_feature,
                history_click_feature,
                history_no_click_feature,
                item_feature,
            ]
        )
        x_dense1 = Dense(128, activation="relu")(x)
        x_dense2 = Dense(128, activation="relu")(x_dense1)
        # ctr_pred = Dense(1, activation="linear", name="q_value")(x_dense2)
        ctr_pred = Dense(1, activation=self.last_act, name="q_value")(x_dense2)
        model = Model(
            inputs=[
                user_input,
                history_click_input,
                history_no_click_input,
                item_input,
            ],
            outputs=ctr_pred,
        )
        model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        if self._summary:
            model.summary()

        self.user_input = tf.placeholder(
            dtype=self.input_type, name="user_input", shape=(None, self.user_dim)
        )
        self.history_click_input = tf.placeholder(
            dtype=self.input_type,
            name="history_click_input",
            shape=(None, self.n_history_click * self.item_dim),
        )
        self.history_no_click_input = tf.placeholder(
            dtype=self.input_type,
            name="history_no_click_input",
            shape=(None, self.n_history_no_click * self.item_dim),
        )
        self.item_input = tf.placeholder(
            dtype=self.input_type, name="item_input", shape=(None, self.item_dim)
        )

        self.ctr_predict = model(
            [
                self.user_input,
                self.history_click_input,
                self.history_no_click_input,
                self.item_input,
            ]
        )
        self.actor_var = TFVariables([self.ctr_predict], self.sess)

        self.sess.run(tf.initialize_all_variables())
        return model
예제 #17
0
 def create_rep_network(self):
     obs = Input(shape=self.state_dim, name='rep_input')
     hidden = Dense(HIDDEN1_UNITS, activation='relu')(obs)
     out_rep = Dense(HIDDEN2_UNITS, activation='relu')(hidden)
     # out_rep = Lambda(hidden_normlize)(out_rep)
     return Model(inputs=obs, outputs=out_rep)
예제 #18
0
 def create_policy_network(self):
     hidden_input = Input(shape=HIDDEN2_UNITS, name='hidden_input')
     hidden = Dense(HIDDEN1_UNITS, activation='relu')(hidden_input)
     out_v = Dense(self.value_support_size, activation='softmax')(hidden)
     out_p = Dense(self.action_dim, activation='softmax')(hidden)
     return Model(inputs=hidden_input, outputs=[out_p, out_v])