Пример #1
0
def SoftmaxAutoEncoder(
    input_dim,
    latent_dim=50,
    encoder=None,
    decoder=None,
    activation=None,
    loss=None,
    sparse=True, use_tied_layer=True, use_binary_activation=True, alpha=50,
    lr=0.001,
):
    """Softmax AutoEncoder

    Autoencoder using kullback_leibler_divergence as objective function, and softmax as output activation.
    Requiring input matrix row sum to 1.

    Parameters
    ----------

    input_dim : dim of input sample.
    latent_dim : latent dim of latent vector.
    encoder :
        if not None, then will be used as latent_vector = encoder(input_layer).
    decoder :
        if not None, then will be used as generated_input = decoder(latent_vector).
    activation :
        default is "tanh" when use_binary_activation is False, otherwise variant sigmoid.
    loss : default is kullback_leibler_divergence.
    use_tied_layer :
        whether to use tied layer or not,
        used only when encoder and decoder is None.
    use_binary_activation :
        if True, using variant sigmoid 1/(1+exp(alpha*-x)).
    alpha : alpha in variant sigmoid.
    lr : learning rate.

    Examples
    --------

    import keras
    from keras_aquarium import sae
    from scipy.sparse import csr_matrix
    import numpy as np

    # suppose you have a sparse matrix, which represents bag-of-words documents
    bow_docs = csr_matrix([n_docs, n_words])

    model = sae.SoftmaxAutoEncoder(
        input_dim, # dim of input sample
        latent_dim=50, # latent dim of latent vector
        encoder=None, # if not None, then will be used as latent_vector = encoder(input_layer),
        decoder=None, # if not None, then will be used as generated_input = decoder(latent_vector)
        activation=None, # default is "tanh" when use_binary_activation is False, otherwise variant sigmoid
        loss=None, # default is kullback_leibler_divergence
        use_tied_layer=True, # whether to use tied layer or not, used only when encoder and decoder is None
        use_binary_activation=True, # if True, using variant sigmoid 1/(1+exp(alpha*-x))
        alpha=50, # alpha in variant sigmoid
    )

    def generate_dataset(batch_size):
        # memory friendly
        indices = np.arange(len(bow_docs))
        while True:
            np.random.shuffle(indices)
            for i in xrange(0, len(indices), batch_size):
                inds = indices[i:i+batch_size]
                yield bow_docs[inds], bow_docs[inds].toarray()

    batch_size = 32
    model.fit_generator( generate_dataset(batch_size),  len(bow_docs)/batch_size, )
    """

    input_layer = Input(shape=[input_dim,], sparse=sparse)

    if encoder is not None:
        hidden = encoder(input_layer)
    else:
        hidden = input_layer

    if activation is None:

        if use_binary_activation:
            def binary_activation(x, ):
                """
                embarrsingly using variant sigmoid sgm(x) = 1/(1 + exp(alpha*-x)), turn to sigmoid when alpha = 1,
                faster convergence,
                """
                x = -1*alpha*x
                x = K.clip(x, -1e16, 80)
                alive = 1 / (1+K.exp(x))
                return alive
            activation = binary_activation
        else:
            activation = activations.tanh

    encoder_ = Dense(latent_dim, activation=activation, kernel_initializer="glorot_normal",)
    code = encoder_(hidden)
    # code = Lambda(activation, )(code) # activation

    if decoder is not None:
        hidden_g = decoder(code)
    else:
        hidden_g = code

    if use_tied_layer:
        decoder_ = Dense_tied(input_dim,
            activation="softmax",
            tied_to=encoder_,
            kernel_regularizer=regularizers.l2(0.00001),
            bias_regularizer=regularizers.l2(0.00001),)
    else:
        decoder_ = Dense(input_dim, activation="softmax", )

    res_input = decoder_(hidden_g)

    model = Model(inputs=input_layer, outputs=res_input, )
    if loss is None:
        loss = losses.kullback_leibler_divergence
    model.compile(
        loss=loss,
        optimizer=optimizers.Nadam(lr=lr))

    encoder = Model(inputs=input_layer, outputs=code, )
    model._keras_aquarium_params = \
    dict(encoder=encoder, )

    return model
Пример #2
0
def DualMatrixFactorization(
    n_row,
    n_col,
    n_row_feature=None,
    n_col_feature=None,
    row_dim=50,
    col_dim=50,
    row_feature_dim=50,
    col_feature_dim=50,
    row_layers=[(50, "relu")],
    col_layers=[(50, "relu")],
    # output_mode="single",
    model_name=None,
):
    """Dual Deep Matrix Factorization.

    By introducing deeper encoding of both user vectors and item vectors, it outperform than simple Matrix Factorization.
    The model is inspried by paper [COLLABORATIVE DEEP EMBEDDING VIA DUAL NETWORKS](https://openreview.net/pdf?id=r1w7Jdqxl)

    Parameters
    ----------
    n_row : the row of matrix.
    n_col : the col of matrix.
    n_row_feature :
        number of row features, default is None, no row features are applied.
    n_col_feature :
        number of col features, default is None, no col features are applied.
    row_dim : embedding dim of a row element.
    col_dim : embedding dim of a col element.
    row_feature_dim :
        latent representation dim of a row features.
    col_feature_dim :
        latent representation dim of a col features.
    row_layers : list of tuple (dim, activation) or callable
        To construct hidden layers.
    col_layers : list of tuple (dim, activation) or callable
        To construct hidden layers.
    model_name : str
        name of the model.

    Examples
    --------

    import keras
    from keras_aquarium import dmf
    from scipy.sparse import csr_matrix, coo_matrix

    # suppose you have a sparse matrix as a user item rating matrix
    rating_matrix = coo_matrix( shape=[n_user, n_item] )
    users = rating_matrix.row
    items = rating_matrix.col
    ratings = rating_matrix.data

    # you also have a sparse feature matrix for item, such as location of item, price of item, etc.
    item_features = csr_matrix( shape=[n_item, n_item_feature] )
    # and sadly, you dont have any feature for user
    user_features = None

    # then you want to apply a Matrix Factorization model to predict ratings
    model = dmf.DualMatrixFactorization(
        # we choose user as row, item as col
        n_row=n_user, n_col=n_item, # specified matrix shape
        # or we can choose item as row, user as col, n_row=n_item, n_col=n_user, just transpose the matrix

        n_row_feature=None, # we dont have user feature
        n_col_feature=n_item_feature, # we do have item feature
        row_dim=30, col_dim=20, # specifed embedding dim, each user and item is then first embedded as a vector
        row_feature_dim=None, # no user feature
        col_feature_dim=None, # specifed item feature embedding dim, each item feature will be encoded as a dense vector

        # row_layers and col_layers are for encoding layers for user and item,
        # they work separately, so they can have different number of hidden layers
        # just make sure the finla hidden layer are the same dim
        row_layers=[(50, "relu"), (30, keras.losses.tanh)], # use two hidden layers, first one is a dense layer with 50 unit and relu activation, second one is a dense layer with 30 unit and tanh activation
        col_layers=[ (lambda x: Dense(30, regularizer=l2(0.001))(x) ), ], # can use a callable to create a hidden layer
    )

    # use it as a keras model
    model.compile(loss="mse", optimizer="adam", )

    inputs = [users] + [items, item_features[items], ] # note that there is no user_features
    model.fit(inputs, ratings, )
    """
    def make_row_layers(n_row, n_row_feature, row_feature_dim):
        row_input = Input(shape=(1, ), dtype="int32")
        row_embd = Embedding(
            input_dim=n_row,
            input_length=1,
            output_dim=row_dim,
        )
        row_embd = Flatten()(row_embd(row_input))

        if n_row_feature is not None:
            row_feature_input = Input(shape=(n_row_feature, ), sparse=True)
            row_feature_embd = Dense(
                row_feature_dim,
                activation=None,
            )(row_feature_input)
            row_hidden = concatenate([row_embd, row_feature_embd], )
        else:
            row_feature_input = None
            row_hidden = row_embd

        if row_feature_input is None:
            inputs = [row_input]
        else:
            inputs = [row_input, row_feature_input]

        # print "row_hidden.shape:", K.int_shape(row_hidden)

        return inputs, row_hidden

    [row_inputs, row_hidden] = make_row_layers(n_row, n_row_feature,
                                               row_feature_dim)
    [col_inputs, col_hidden] = make_row_layers(n_col, n_col_feature,
                                               col_feature_dim)

    row_hiddens = []
    col_hiddens = []

    def map_layers(layers, hidden):
        hiddens = []

        for l in layers:
            if callable(l):
                hidden = l(hidden)
                print "callable"
            else:
                (dim, act) = l
                print K.int_shape(hidden)
                hidden = Dense(dim, activation=act)(hidden)

            hiddens.append(hidden)

        return hiddens

    row_hiddens = map_layers(row_layers, row_hidden)
    col_hiddens = map_layers(col_layers, col_hidden)

    def zip_row_col(row_hiddens, col_hiddens):
        outputs = []

        for row, col in zip(row_hiddens, col_hiddens):
            output = dot_layer([row, col], axes=-1)
            outputs.append(output)

        return outputs

    outputs = zip_row_col(row_hiddens, col_hiddens)
    # TODO: add multilevel mode
    # if output_mode == "single":
    #     pred = outputs[-1]
    # else:
    #     pred = add_layer(outputs)
    pred = outputs[-1]

    model = Model(
        inputs=row_inputs + col_inputs,
        outputs=pred,
        name=model_name,
    )

    model.compile(optimizer=optimizers.Nadam(),
                  loss=losses.mean_squared_error,
                  metrics=['accuracy'])

    model._keras_aquarium_params = \
    dict(
        model_type="dmf",
        outputs=outputs,
        row_hiddens=row_hiddens,
        col_hiddens=col_hiddens,
        row_encoder=Model(inputs=row_inputs, outputs=row_hiddens[-1]),
        col_encoder=Model(inputs=col_inputs, outputs=col_hiddens[-1]),
    )

    return model
Пример #3
0
def HierarchicalAttentionRNN(
    max_sents,
    max_sent_length,
    n_classes,
    embeddings=None,
    n_words=None,
    word_dim=50,
    word_hidden_dim=100,
    sent_hidden_dim=100,
):
    """Hierarchical Attention RNN(GRU)

    Two level of lstm network for text Classification, encode sentence by words first, then encode document by sentences.
    Also add attention for both words and sentences.

    Check paper [HIERARCHICAL ATTENTION NETWORKS FOR DOCUMENT CLASSIFICATION](https://www.cs.cmu.edu/~hovy/papers/16HLT-hierarchical-attention-networks.pdf) for more details.

    Parameters
    ----------

    max_sents : number of sentences in a document
    max_sent_length : number of words in a sentence
    n_classes : number of classes
    embeddings :
        use it to initialize word embeddings if applied
    n_words : number of words in vocabulary
    word_dim : dim of word embeddings
    word_hidden_dim : number of word units in rnn
    sent_hidden_dim : number of sentence units in rnn

    Examples
    --------

    import keras
    from keras_aquarium import hatt_rnn
    from scipy.sparse import csr_matrix
    import numpy as np

    # suppose you have a 3D matrix (n_docs * n_sentences_in_doc * n_words_in_sentence), represents documents,
    sequence_docs = np.zeros([n_docs, n_sentences_in_doc, n_words_in_sentence]) # padding zeros
    word_embeddings = load_glove_word_embeddings()
    vocabulary = load_vocabulary()

    model = hatt_rnn.HierarchicalAttentionRNN(
        max_sents,
        max_sent_length,
        n_classes,

        # if use word_embeddings to initialize word embeddings layer
        embeddings=word_embeddings,
        # else
        n_words=len(vocabulary),
        word_dim=50,

        # units in words and sentences gru layer
        word_hidden_dim=100,
        sent_hidden_dim=100,
    )

    model.fit(sequence_docs, labels)
    """

    if embeddings is None:
        # embeddings = np.random.uniform([n_words, word_dim])
        embedding_layer = Embedding(n_words+1,
                                word_dim,
                                input_length=max_sent_length,
                                # mask_zero=True,
                                trainable=True)
    else:
        embedding_layer = Embedding(len(embeddings),
                                len(embeddings[0]),
                                weights=[embeddings],
                                input_length=max_sent_length,
                                mask_zero=True,
                                trainable=True)

    sent_input = Input(shape=(max_sent_length,), dtype='int32')
    embedded_sequences = embedding_layer(sent_input)

    class AttLayer(Layer):
        def __init__(self, hit=None, **kwargs):
            #self.input_spec = [InputSpec(ndim=3)]
            self.init = initializers.glorot_uniform()
            super(AttLayer, self).__init__(**kwargs)
            self.hit = hit

        def build(self, input_shape_li):
            input_shape = input_shape_li[-1]
            assert len(input_shape)==3
            self.W = self.init((input_shape[-1],))
            self.W = K.variable(self.W)
            self._x_input_shape = input_shape
            self.trainable_weights = [self.W]
            super(AttLayer, self).build(input_shape)  # be sure you call this somewhere!

        def call(self, xli, mask=None):
#             eij = K.tanh(K.dot(x, self.W))
            hit, x = xli
            # print "hit.shape:", K.int_shape(hit)

            def get_weights_(x):
                eij = K.dot(x, K.reshape(self.W, [self._x_input_shape[-1], 1]) )
                eij = K.squeeze(eij, axis=-1)
                # print "eij.shape:", K.int_shape(eij)

                ai = K.exp(eij)
                ai_sum = K.sum(ai, axis=1)
                ai_sum = K.reshape(ai_sum, [-1, 1])
                # print "ai_sum.shape:", K.int_shape(ai_sum)
                weights = ai/ai_sum
                # print "weights.shape:", K.int_shape(weights)

                return weights

            weights = get_weights_(x)

            self.output_weights = Lambda(get_weights_, )(x)

            # weighted_input = hit * weights
            weights = K.expand_dims(weights, axis=1)
            weighted_input = K.batch_dot(weights, hit, axes=[2, 1, ])
            weighted_input = K.squeeze(weighted_input, axis=1)

            # weighted_input = K.tf.einsum("ijk,ij->ijk", hit, weights) # batch_dot is equivalent to K.tf.einsum to general method
            # weighted_input = K.sum(weighted_input, axis=1)

            # print "weighted_input.shape:", K.int_shape(weighted_input)

            return weighted_input

        def get_output_shape_for(self, input_shape_li):
            input_shape = input_shape_li[-1]
            return (input_shape[0], input_shape[-1])

        def compute_output_shape(self, input_shape_li):
            return self.get_output_shape_for(input_shape_li)

    def get_weights(args):
        a, b = args
        eij = K.dot(a, K.transpose(b))
        ai = K.exp(eij)
        weights = ai / K.sum(ai, axis=1)
        return weights

    layer_mode = True

    # ======== sent level =========

    sent_hidden = Bidirectional(
        GRU(word_hidden_dim, activation="tanh", return_sequences=True)
    )(embedded_sequences)

    bi_word_hidden_dim = 2 * word_hidden_dim
    sent_hidden_att = TimeDistributed(
        Dense(bi_word_hidden_dim, activation="sigmoid")
    )(sent_hidden)

    if layer_mode:
        word_att_layer = AttLayer()
        sent_encoded = word_att_layer([sent_hidden, sent_hidden_att])
    else:
        words_attention = K.random_uniform_variable(
            [1, bi_word_hidden_dim], low=0, high=1, )
        word_weights = get_weights([sent_hidden_att, words_attention])
        def attend_words(args):
            sent_hidden, sent_hidden_att = args
            weighted_input = sent_hidden * word_weights
            weighted_input = K.sum(weighted_input, axis=1)
            return weighted_input
        sent_encoded = Lambda(attend_words, )([sent_hidden, sent_hidden_att])

    sent_encoder = Model(sent_input, sent_encoded)

    # ======== doc level =========

    sents_input = Input(
        shape=(max_sents, max_sent_length), dtype='int32', )

    sents_encoded = TimeDistributed(sent_encoder)(sents_input)
    doc_hidden = Bidirectional(
        GRU(sent_hidden_dim, activation="tanh", return_sequences=True)
    )(sents_encoded)

    bi_sent_hidden_dim = 2 * sent_hidden_dim
    doc_hidden_att = TimeDistributed(
        Dense(bi_sent_hidden_dim, activation="sigmoid")
    )(doc_hidden)

    if layer_mode:
        sent_att_layer = AttLayer()
        doc_encoded = sent_att_layer([doc_hidden, doc_hidden_att])
    else:
        sents_attention = K.random_uniform_variable(
            [1, bi_sent_hidden_dim], low=0, high=1, )
        sent_weights = get_weights([doc_hidden_att, sents_attention])
        def attend_doc(args):
            doc_hidden, doc_hidden_att = args
            weighted_input = sent_hidden * sent_weights
            weighted_input = K.sum(weighted_input, axis=1)
            return weighted_input
        doc_encoded = Lambda(attend_doc, )([doc_hidden, doc_hidden_att])

    # ======== fully connected =========

    pred = Dense(n_classes, activation='softmax')(doc_encoded)
    model = Model(sents_input, pred)

    model.compile(
        loss='categorical_crossentropy',
        optimizer='nadam',
        metrics=['accuracy'])

    # ======== weights =========

    if layer_mode:
        # pass
        sent_weights_model = Model(sents_input, sent_att_layer.output_weights)
    else:
        word_weights_layer = Lambda(get_weights, )([sent_hidden_att, words_attention])
        print K.int_shape(word_weights_layer)
        word_weights_model = Model(sent_input, word_weights_layer)

        sents_word_weights = TimeDistributed(word_weights_model)(sents_input)
        word_weights_model = Model(sents_input, sents_word_weights)

        sent_weights_layer = Lambda(get_weights, )([doc_hidden_att, sents_attention])
        sent_weights_model = Model(sents_input, sent_weights_layer)

    model._keras_aquarium_params = \
    dict(
        model_type="hatt_rnn",
        # word_weights_model=word_weights_model,
        sent_weights_model=sent_weights_model,
        max_sents=max_sents,
        max_sent_length=max_sent_length,
    )

    return model