示例#1
0
def build_rnn_attention_model(self):
    # 基于rnn的编码器
    self.network_type = 'rnn_attention_model'
    model_input, encoder_input, decoder_output = general_input(self)

    # 目前来看在rnn前或中间加dense层效果很差
    encoder_concatenate = layers.concatenate(encoder_input)
    bn_concatenate_layers = BatchNormalization()(encoder_concatenate)
    # lstm返回多返回一个传动带变量,这里不需要
    values, h_env, _ = CuDNNLSTM(1024,
                                 return_sequences=True,
                                 return_state=True)(bn_concatenate_layers)
    # 这里模仿解码器的过程,将上一次的输出和hidden state 与 encoder_input合并作为query,这里输入的query远小于h,是个问题。。
    # embedding后多了一个维度尺寸,压平才能与h conact
    decoder_lstm, h_act, _ = CuDNNLSTM(256,
                                       return_sequences=True,
                                       return_state=True)(decoder_output)
    decoder_lstm = CuDNNLSTM(256)(decoder_lstm)
    query = layers.concatenate([h_env, h_act])
    c_vector, _ = BahdanauAttention(512)(query, values)
    # 这里我是多对一,同一序列只解码一次,所以直接用encoder的输出隐藏状态
    # 由于只输出一次,解码也不再用rnn,而是直接全连接
    t_status = layers.concatenate([c_vector, decoder_lstm])
    t_status = layers.Dense(256, kernel_initializer='he_uniform')(t_status)
    output = layers.LeakyReLU(0.05)(t_status)
    shared_model = Model(model_input, output)
    return shared_model
    def build_model(self):
        # Model building
        self.model = Sequential()

        # LSTM layers
        for layer_size in self.lstm_layers[:-1]:
            self.model.add(
                CuDNNLSTM(layer_size, input_size=1, return_sequences=True)
            )  # TODO: Sort the input shape thingy and check the return sequence thingy
            self.model.add(Dropout(self.dropout_rate))
            self.model.add(BatchNormalization())

        # Last LSTM layer
        self.model.add(CuDNNLSTM(self.lstm_layers[-1]))
        self.model.add(Dropout(self.dropout_rate))
        self.model.add(BatchNormalization())

        # Dense output layers
        self.model.add(Dense(32, activation='relu'))
        self.model.add(Dropout(self.dropout_rate))
        self.model.add(Dense(1, activation='relu'))

        opt = tf.keras.optimizers.Adam(lr=self.learning_rate)

        self.model.compile(  # TODO: CHECK metrics and loss
            loss='sparse_categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy'],
        )
def CuDNNLSTM_Autoencoder_GPU():

    model = Sequential()
    #Each input in data sample is a 2D array that will be fed to LSTM Network layer
    #The output of the layer will be an encoded feature vector of the input data

    #Input shape is 2D array timesteps x n_features.
    #First layer will have 128 neurons
    model.add(
        CuDNNLSTM(128,
                  input_shape=(timesteps, n_features),
                  return_sequences=False))

    #Dropout regularization. 20% of neurons
    model.add(Dropout(0.2))

    #When second hidden layer is LSTM:
    #The encoded feature vector ouput must be replicated * timesteps
    model.add(RepeatVector(timesteps))

    #Decoder layer
    #We set return_sequences to True. Each neuron will give a signal per timestep
    model.add(CuDNNLSTM(128, return_sequences=True))

    model.add(Dropout(0.2))

    #To use TimeDistributedlayer, return sequences from previous LSTM layer must be set to True
    #This is the output layer. It will create a vector with length of previous LSTM neurons
    model.add(TimeDistributed(Dense(n_features)))

    #Compiling using mean absolute error as the loss function
    #Adam' optimizer for gradient descent with default learning rate
    model.compile(loss='mae', optimizer=adam)

    return model
示例#4
0
def build_stacked_rnn_model(self):
    self.network_type = 'stacked_rnn_model'
    model_input, encoder_input, decoder_output = general_input(self)

    # 分开做rnn效果不好
    # 这里使用结合的
    concatenate_layers = layers.concatenate(encoder_input)
    bn_concatenate_layers = BatchNormalization()(concatenate_layers)
    # 目前 512 - 1024 的效果好,但数据量较大
    t_status = CuDNNLSTM(512, return_sequences=True)(bn_concatenate_layers)
    t_status = CuDNNLSTM(1024)(t_status)
    # 双向lstm,效果不好
    # t_status = Bidirectional(CuDNNLSTM(1024))(concatenate_layers)
    decoder_lstm = CuDNNLSTM(256, return_sequences=True)(decoder_output)
    decoder_lstm = CuDNNLSTM(256)(decoder_lstm)
    t_status = layers.concatenate([decoder_lstm, t_status])
    # 不是同一内容起源的最好不要用add
    # t_status = layers.add([t_status, q])
    t_status = layers.Dense(512, kernel_initializer='he_uniform')(t_status)

    t_status = layers.LeakyReLU(0.05)(t_status)
    t_status = layers.Dense(256, kernel_initializer='he_uniform')(t_status)
    output = layers.LeakyReLU(0.05)(t_status)
    shared_model = Model(model_input, output)
    # 这里模型不能编译,不然后面无法扩充
    return shared_model
示例#5
0
def create_model_AZ_split_lstm(rnn_unit, concated_id_emb_dict):
    # input layers no mask add
    inputs_dict = get_seq_input_layers(cols=EMB_keys2do)
    inputs_all = list(inputs_dict.values())
    # feature filter conv setting
    conv1d_info_dict = {
        'creative_id': 256,
        'ad_id': 128,
        'advertiser_id': 128,
        'industry': 64,
        'product_category': 64,
        'product_id': 128,
        'time': 32,
        'click_times': -1
    }
    layers2concat = []
    for id_nm, emb_matrix in concated_id_emb_dict.items():
        if id_nm != 'click_times':
            print(id_nm, 'get embedding!')
            emb_layer = get_emb_layer(emb_matrix,
                                      trainable=TRAINABLE_DICT[id_nm])
            x = emb_layer(inputs_dict[id_nm])
            if conv1d_info_dict[id_nm] > -1:
                cov_layer = keras.layers.Conv1D(
                    filters=conv1d_info_dict[id_nm],
                    kernel_size=1,
                    activation='relu')
                x = cov_layer(x)
            layers2concat.append(x)
    # embedding all connected
    concat_emb_w2v = keras.layers.concatenate(layers2concat)
    # 4 route lstm
    lstm_1 = keras.layers.Bidirectional(CuDNNLSTM(
        128, return_sequences=True))(concat_emb_w2v)
    lstm_3 = keras.layers.Bidirectional(CuDNNLSTM(
        64, return_sequences=True))(concat_emb_w2v)
    lstm_2 = keras.layers.Bidirectional(CuDNNLSTM(
        32, return_sequences=True))(concat_emb_w2v)
    lstm_4 = keras.layers.Bidirectional(CuDNNLSTM(
        128, return_sequences=True))(concat_emb_w2v)
    concat_emb_w2v = keras.layers.concatenate([lstm_1, lstm_2, lstm_3, lstm_4])
    # last lstm
    concat_all = LSTM_net_AZ_split_lstm(concat_emb_w2v, n_unit=rnn_unit)
    concat_all = keras.layers.Dropout(0.3)(concat_all)
    # Dense layers
    x = keras.layers.Dense(256)(concat_all)
    x = keras.layers.PReLU()(x)
    x = keras.layers.Dense(256)(x)
    x = keras.layers.PReLU()(x)

    outputs_all = keras.layers.Dense(NUM_CLASSES,
                                     activation='softmax',
                                     name='age_gender')(x)
    model = keras.Model(inputs_all, outputs_all)
    print(model.summary())
	# return compiled model tf 2.0
	model.compile(optimizer=keras.optimizers.Adam(lr=1e-3),
                loss='sparse_categorical_crossentropy',
                metrics=['acc'])
示例#6
0
def lstm_model():
    model = tf.keras.models.Sequential()
    model.add(Embedding(word_len, 64, input_length=maxlen))
    model.add(CuDNNLSTM(512, return_sequences=True))
    model.add(CuDNNLSTM(512))
    model.add(Dense(word_len, activation='softmax'))
    optimizer = tf.keras.optimizers.RMSprop(lr=0.01)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer)
    return model
示例#7
0
def get_hegemax_model(seq_length, print_summary=True):
    forward_image_input = Input(shape=(seq_length, 160, 350, 3),
                                name="forward_image_input")
    info_input = Input(shape=(seq_length, 3), name="info_input")
    hlc_input = Input(shape=(seq_length, 6), name="hlc_input")

    x = TimeDistributed(Cropping2D(cropping=((50, 0),
                                             (0, 0))))(forward_image_input)
    x = TimeDistributed(Lambda(lambda x: ((x / 255.0) - 0.5)))(x)
    x = TimeDistributed(Conv2D(24, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(36, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(48, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x)
    conv_output = TimeDistributed(Flatten())(x)

    x = concatenate([conv_output, info_input, hlc_input])

    x = TimeDistributed(Dense(100, activation="relu"))(x)
    x = CuDNNLSTM(10, return_sequences=False)(x)
    steer_pred = Dense(10, activation="tanh", name="steer_pred")(x)

    x = TimeDistributed(Cropping2D(cropping=((50, 0),
                                             (0, 0))))(forward_image_input)
    x = TimeDistributed(Lambda(lambda x: ((x / 255.0) - 0.5)))(x)
    x = TimeDistributed(Conv2D(24, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(36, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(48, (5, 5), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), strides=(2, 2),
                               activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x)
    conv_output = TimeDistributed(Flatten())(x)

    x = concatenate([conv_output, info_input, hlc_input])

    x = TimeDistributed(Dense(100, activation="relu"))(x)
    x = CuDNNLSTM(10, return_sequences=False)(x)
    throtte_pred = Dense(1, name="throttle_pred")(x)
    brake_pred = Dense(1, name="brake_pred")(x)

    model = Model(inputs=[forward_image_input, info_input, hlc_input],
                  outputs=[steer_pred, throtte_pred, brake_pred])

    if print_summary:
        model.summary()

    return model
示例#8
0
def lstm_model(maxlen, wl_chars):
    model = Sequential()
    # model.add(CuDNNLSTM(512, input_shape=(maxlen, len(wl_chars)), return_sequences=True, return_state=True, stateful=True))
    model.add(CuDNNLSTM(512, return_sequences=True, input_shape=(maxlen, len(wl_chars))))
    model.add(Dropout(0.2))
    model.add(CuDNNLSTM(256, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(CuDNNLSTM(128, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(CuDNNLSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(len(wl_chars), activation='softmax'))
    return model
示例#9
0
    def buildModel(self, model_path=None):
        try:
            if model_path is None:
                model_path = './model_tensorboard_2.h5'
            mymodel = load_model(model_path)
            print('retrain model...........')
            history = mymodel.fit(self.x_train, self.y_train, batch_size=50, epochs=500, verbose=0, validation_split=0.2, callbacks=[TensorBoard('./logs2')])
            self.history = history.history
            mymodel.save('./model_tensorboard_2.h5')
            self.model = mymodel
            self._write_val_loss_to_csv()
        except:
            print('train new model.........')
            start = datetime.datetime.now()
            mymodel = Sequential()
            mymodel.add(CuDNNLSTM(50, input_shape=(20, 1), return_sequences=True))
            mymodel.add(Activation('sigmoid'))
            mymodel.add(BatchNormalization())
            mymodel.add(Dropout(0.2))

            mymodel.add(CuDNNLSTM(100, return_sequences=True))
            mymodel.add(Activation('sigmoid'))
            mymodel.add(BatchNormalization())
            mymodel.add(Dropout(0.2))

            mymodel.add(CuDNNLSTM(100))
            mymodel.add(Activation('tanh'))
            mymodel.add(BatchNormalization())
            mymodel.add(Dropout(0.2))

            mymodel.add(Dense(50, activation='sigmoid'))
            mymodel.add(BatchNormalization())
            mymodel.add(Dropout(0.2))

            mymodel.add(Dense(20, activation='sigmoid'))
            mymodel.add(BatchNormalization())
            mymodel.add(Dropout(0.2))

            mymodel.add(Dense(22, activation='relu'))

            mymodel.compile('adam', 'mae', metrics=['mae'])
            print(mymodel.summary())
            self.model = mymodel
            history = mymodel.fit(self.x_train, self.y_train, batch_size=50, epochs=3000, verbose=2, validation_split=0.2, callbacks=[TensorBoard()])
            self.history = history.history
            mymodel.save('./model_tensorboard_2.h5')
            end = datetime.datetime.now()
            print('耗时',end-start)
            self._write_val_loss_to_csv()
示例#10
0
def build_train_rnn(x_train,
                    x_test,
                    y_train,
                    y_test,
                    epochs=250,
                    batch_size=64):
    clear_session()
    classifier = tf.keras.Sequential()
    classifier.add(
        CuDNNLSTM(units=64,
                  return_sequences=True,
                  input_shape=(x_train.shape[1:]),
                  kernel_initializer='random_uniform',
                  kernel_regularizer=tf.keras.regularizers.l2(l=1e-4)))
    classifier.add(
        tf.keras.layers.Dropout(0.2)
    )  # ignore 20% of the neurons in both forward and backward propagation
    classifier.add(
        CuDNNLSTM(units=64,
                  return_sequences=True,
                  kernel_initializer='random_uniform',
                  kernel_regularizer=tf.keras.regularizers.l2(l=1e-4)))
    classifier.add(
        tf.keras.layers.Dropout(0.2)
    )  # ignore 20% of the neurons in both forward and backward propagation
    classifier.add(
        CuDNNLSTM(units=64,
                  return_sequences=False,
                  kernel_initializer='random_uniform',
                  kernel_regularizer=tf.keras.regularizers.l2(l=1e-4)))
    classifier.add(tf.keras.layers.Dropout(0.2))
    classifier.add(
        tf.keras.layers.Dense(units=128, kernel_initializer='random_uniform'))
    classifier.add(tf.keras.layers.Dropout(0.2))
    classifier.add(
        tf.keras.layers.Dense(units=y_train.shape[1],
                              activation='softmax',
                              kernel_initializer='random_uniform'))
    adam = tf.keras.optimizers.Adam(lr=1e-4, decay=1e-7)
    classifier.compile(optimizer=adam,
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])
    history = classifier.fit(x=x_train,
                             y=y_train,
                             validation_data=(x_test, y_test),
                             epochs=epochs,
                             batch_size=batch_size)
    return history
示例#11
0
def trans_net(inputs, masks, hidden_unit=128):
    inputs = tf.keras.layers.Dropout(0.3)(inputs)
    encodings = tf.keras.layers.Conv1D(filters=inputs.shape[-1],
                                       kernel_size=1,
                                       padding='same',
                                       activation='relu')(inputs)
    # trans tunnel
    for i in range(1):
        # pre Norm
        encodings = LayerNormalization()(encodings)
        # Masked-Multi-head-Attention
        masked_attention_out = MultiHeadAttention(8, encodings.shape[-1] // 8)(
            [encodings, encodings, encodings, masks])
        # Add & Norm
        masked_attention_out = masked_attention_out + encodings
        # Feed-Forward
        ff = PositionWiseFeedForward(encodings.shape[-1], hidden_unit)
        ff_out = ff(masked_attention_out)
    # LSTM
    x = tf.keras.layers.Bidirectional(
        CuDNNLSTM(hidden_unit, return_sequences=True))(encodings)
    # linear
    x = tf.keras.layers.Conv1D(filters=encodings.shape[-1],
                               kernel_size=1,
                               padding='same',
                               activation='relu')(x)
    # 3 项Add & Norm
    x = x + masked_attention_out + ff_out
    x = LayerNormalization()(x)
    return x
    def get_bidirectional_cudnn_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False):
        """
        cudnn provided versions, should be much faster
        :param pre_embeddings:
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)
        if use_lstm:
            x = Bidirectional(CuDNNLSTM(RNN_DIM, return_sequences=True))(embedding)  # LSTM
        else:
            x = Bidirectional(CuDNNGRU(RNN_DIM, return_sequences=True))(embedding)  # GRU

        # add none or one of the following attention layers
        x, atten_layer = self.do_attention(x)
        fn = kb.function([input], [atten_layer.att_weights])

        if dp_rate > 0:
            # 加dropout层
            x = Dropout(dp_rate)(x)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model, fn
示例#13
0
def LSTM_net_AZ_split_lstm(emb_layer, n_unit=128):
    # lstm
    x = keras.layers.Bidirectional(CuDNNLSTM(n_unit,
                                             return_sequences=True))(emb_layer)
    # conv feature filter structure
    conv1_a = keras.layers.Conv1D(
        filters=128,
        kernel_size=2,
        padding='same',
        activation='relu',
    )(x)
    conv1_b = keras.layers.Conv1D(
        filters=64,
        kernel_size=4,
        padding='same',
        activation='relu',
    )(x)
    conv1_c = keras.layers.Conv1D(
        filters=32,
        kernel_size=8,
        padding='same',
        activation='relu',
    )(x)
    max_pool1 = keras.layers.GlobalMaxPooling1D()(x)

    gp1_a = keras.layers.GlobalAveragePooling1D()(conv1_a)
    gp1_b = keras.layers.GlobalAveragePooling1D()(conv1_b)
    gp1_c = keras.layers.GlobalMaxPooling1D()(conv1_c)
    # res
    concat = keras.layers.concatenate([max_pool1, gp1_a, gp1_b, gp1_c])
    return concat
示例#14
0
 def model(embedding_size, n_a): 
     X = Input(batch_shape=(batch_size, None, embedding_size))
     a1 = Bidirectional(CuDNNLSTM(units=n_a, return_sequences = True))(X) # functional API needs specifying inputs, just like any functions.
     a2 = Dense(16, activation = "tanh")(a1)
     yhat = Dense(1, activation = "sigmoid")(a2)
     model = Model(inputs = X, outputs = yhat)
     return model
示例#15
0
def build_model():
    model = tf.keras.models.Sequential()
    model.add(CuDNNLSTM(128, input_shape=(maxlen, len(chars))))
    model.add(Dense(len(chars), activation='softmax'))
    optimizer = tf.keras.optimizers.RMSprop(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    return model
示例#16
0
    def get_cnn_rnn_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False, filter_sizes=[2, 3, 4]):
        """
        first CNN to generate a vector, then apply RNN on the vector
        :param pre_embeddings:
        :param dp_rate: drop out rate
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)

        # add a convolution layer
        c = Conv1D(NUM_FILTERS, 3, padding='valid', activation='relu')(embedding)
        cc = MaxPooling1D()(c)

        if dp_rate > 0:
            # 加dropout层
            cc = Dropout(dp_rate)(cc)

        if use_lstm:
            x = CuDNNLSTM(RNN_DIM)(cc)
        else:
            x = CuDNNGRU(RNN_DIM)(cc)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model
示例#17
0
    def get_cudnn_version_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False):
        """
        cudnn provided versions, should be much faster
        :param pre_embeddings:
        :param use_lstm: utilize LSTM or GRU unit
        :return: the model
        """
        # Embedding part can try multichannel as same as origin paper
        embedding_layer = Embedding(self.max_features,  # 字典长度
                                    self.embedding_dims,  # 词向量维度
                                    weights=[pre_embeddings],  # 预训练的词向量
                                    input_length=self.maxlen,  # 每句话的最大长度
                                    trainable=False  # 是否在训练过程中更新词向量
                                    )
        input = Input((self.maxlen,))
        embedding = embedding_layer(input)
        if use_lstm:
            x = CuDNNLSTM(RNN_DIM)(embedding)   # LSTM
        else:
            x = CuDNNGRU(RNN_DIM)(embedding)    # GRU

        if dp_rate > 0:
            # 加dropout层
            x = Dropout(dp_rate)(x)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)

        return model
示例#18
0
    def init_model(self, input_shape, num_classes, **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=2)(inputs)
        lstm_1 = Bidirectional(CuDNNLSTM(64,
                                         name='blstm_1',
                                         return_sequences=True),
                               merge_mode='concat')(inputs)
        activation_1 = Activation('tanh')(lstm_1)
        dropout1 = SpatialDropout1D(0.5)(activation_1)
        attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        pool_1 = GlobalMaxPool1D()(attention_1)
        dropout2 = Dropout(rate=0.5)(pool_1)
        dense_1 = Dense(units=256, activation='relu')(dropout2)
        outputs = Dense(units=num_classes, activation='softmax')(dense_1)

        model = TFModel(inputs=inputs, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
示例#19
0
def Transformer_net_AZ_trans_lstm(inputs, hidden_unit=512):
    # SpatialDropout1D
    inputs = keras.layers.SpatialDropout1D(0.3)(inputs)
    # conv feature filter
    encodings = keras.layers.Conv1D(filters=inputs.shape[-1].value,
                                    kernel_size=1,
                                    padding='same',
                                    activation='relu')(inputs)
    # trans tunnel
    for i in range(1):
        # pre Norm
        encodings = LayerNormalization()(encodings)
        # Masked-Multi-head-Attention
        masked_attention_out = MultiHeadAttention(8, encodings.shape[-1].value // 8, masking=False,
                                                  _masking_num=-2 ** 32 - 1, masking_type='NOMASK') \
            ([encodings, encodings, encodings])  # no mask attention
        # Add
        masked_attention_out = masked_attention_out + encodings
        # Feed-Forward
        ff = PositionWiseFeedForward(encodings.shape[-1].value, hidden_unit)
        ff_out = ff(masked_attention_out)
    # LSTM
    x = keras.layers.Bidirectional(CuDNNLSTM(256,
                                             return_sequences=True))(encodings)
    # linear
    x = keras.layers.Conv1D(filters=encodings.shape[-1].value,
                            kernel_size=1,
                            padding='same',
                            activation='relu')(x)
    # Add & Norm
    x = x + masked_attention_out
    x = x + ff_out
    x = LayerNormalization()(x)
    return x
示例#20
0
    def init_model(self, input_shape, num_classes, **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=-1)(inputs)
        x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True),
                          merge_mode='concat')(inputs)
        # activation_1 = Activation('tanh')(lstm_1)
        x = SpatialDropout1D(0.1)(x)
        x = Attention(8, 16)([x, x, x])
        x1 = GlobalMaxPool1D()(x)
        x2 = GlobalAvgPool1D()(x)
        x = Concatenate(axis=-1)([x1, x2])
        x = Dense(units=128, activation='elu')(x)
        x = Dense(units=64, activation='elu')(x)
        x = Dropout(rate=0.4)(x)
        outputs = Dense(units=num_classes, activation='softmax')(x)

        model = TFModel(inputs=inputs, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
示例#21
0
    def build_discriminator(self):

        model = Sequential()
        model.add(
            CuDNNLSTM(512, input_shape=self.seq_shape, return_sequences=True))
        model.add(Bidirectional(CuDNNLSTM(512)))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        seq = Input(shape=self.seq_shape)
        validity = model(seq)

        return Model(seq, validity)
示例#22
0
def build_LSTM_model(max_features=10000):
    model = Sequential()
    model.add(Embedding(max_features, 32))
    model.add(CuDNNLSTM(32))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['acc'])
    return model
示例#23
0
def build_model_chars(num_chars, lstm_units=256, optimiser='adam'):
    """
    encoder_input will be the large matrix with all the information
    Outputs from encoder LSTM are: encoder_outputs: hidden_state at every timestep; state_h: final hidden state;
    state_c: final cell state
    decoder_input will be matrix with each letter of output
    Initial state for decoder lstm is encoder states
    """
    encoder_input = Input(shape=(None, num_chars), name='encoder_input')
    encoder_outputs, state_h, state_c = CuDNNLSTM(lstm_units, return_sequences=True, return_state=True,
                                                  name="encoder_lstm")(encoder_input)
    encoder_states = [state_h, state_c]

    decoder_input = Input(shape=(None, num_chars), name='decoder_input')
    decoder_lstm = CuDNNLSTM(lstm_units, return_sequences=True, return_state=True, name="decoder_lstm")
    decoder_outputs, _, _ = decoder_lstm(decoder_input, initial_state=encoder_states)
    attention = Attention(name='attention')
    atten_output = attention([decoder_outputs, encoder_outputs])
    atten_output.set_shape([None, None, lstm_units])
    concat = Concatenate()
    concat_output = concat([decoder_outputs, atten_output])
    decoder_dense = Dense(num_chars, input_shape=[None, None, lstm_units], activation='softmax',
                          name='softmax_output')
    decoder_output = decoder_dense(concat_output)

    model = Model([encoder_input, decoder_input], decoder_output)
    model.compile(optimizer=optimiser, loss='categorical_crossentropy')

    encoder_info = [state_h, state_c, encoder_outputs]
    encoder_model = Model(encoder_input, encoder_info)

    decoder_state_input_h = Input(shape=(lstm_units,))
    decoder_state_input_c = Input(shape=(lstm_units,))
    decoder_state_input_enc = Input(shape=[None, lstm_units])
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_input, initial_state=decoder_states_inputs)
    dec_attention = attention([decoder_outputs, decoder_state_input_enc])
    concat_output = concat([decoder_outputs, dec_attention])
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(concat_output)
    dec_state_inputs = [decoder_state_input_h, decoder_state_input_c, decoder_state_input_enc]
    decoder_model = Model([decoder_input] + dec_state_inputs, [decoder_outputs] + decoder_states)

    return model, encoder_model, decoder_model
示例#24
0
def create_vae(input_timesteps, latent_dim, reparameterize_layer):
    # Create NN Graph
    timestep_data = Input(shape=(input_timesteps, 5), name='timestep_data')
    # action_data = Input(shape=(input_timesteps, 1), name='action_data')
    action_data = Lambda(lambda x: x[:, :, -1:],
                         output_shape=(1, ),
                         name='action_data')(timestep_data)

    #combined_data = Concatenate()([timestep_data, action_data])

    rnn_state = Bidirectional(CuDNNLSTM(units=10,
                                        input_shape=(input_timesteps, 5),
                                        return_sequences=True),
                              name='rnn_state')(timestep_data)
    latent_state = TimeDistributed(Dense(
        units=latent_dim * 2,
        bias_initializer=K.initializers.zeros(),
        kernel_initializer=K.initializers.zeros(),
        kernel_constraint=K.constraints.max_norm(0.5)),
                                   name='latent_state')(rnn_state)
    latent_sample = TimeDistributed(Lambda(reparameterize_layer,
                                           output_shape=(latent_dim, )),
                                    name='latent_sample')(latent_state)

    observations = TimeDistributed(Dense(units=100, activation='relu'),
                                   name='obs1')(latent_sample)
    observations = TimeDistributed(Dense(units=100, activation='relu'),
                                   name='obs2')(observations)
    observations = TimeDistributed(Dense(units=100, activation='relu'),
                                   name='obs3')(observations)
    observations = TimeDistributed(Dense(units=3 * 2),
                                   name='obs_out')(observations)

    rewards = TimeDistributed(Dense(units=100, activation='relu'),
                              name='rew1')(latent_sample)
    rewards = TimeDistributed(Dense(units=100, activation='relu'),
                              name='rew2')(rewards)
    rewards = TimeDistributed(Dense(units=100, activation='relu'),
                              name='rew3')(rewards)
    rewards = TimeDistributed(Dense(units=1 * 2), name='rew_out')(rewards)

    state_action = Concatenate()([latent_sample, action_data])
    next_state = TimeDistributed(Dense(units=100, activation='relu'),
                                 name='ns1')(state_action)
    #next_state = TimeDistributed(Dense(units=100, activation='relu'), name='ns2')(next_state)
    #next_state = TimeDistributed(Dense(units=100, activation='relu'), name='ns3')(next_state)
    next_state = TimeDistributed(Dense(units=latent_dim * 2),
                                 name='next_state')(next_state)

    # Create VAE Model
    return Model(inputs=timestep_data,
                 outputs=[
                     observations, rewards, next_state, latent_state,
                     latent_sample
                 ])
def get_model(hyperparameters, predictors, targets):

    # Initialising the RNN
    model = Sequential()
    regularizer = l2(0.01)
    optimizer = Adam(lr=hyperparameters['learning_rate'])

    model.add(
        CuDNNLSTM(units=30,
                  input_shape=(hyperparameters['input_sequence_length'],
                               len(predictors)),
                  return_sequences=True,
                  kernel_regularizer=regularizer))
    model.add(GaussianNoise(1e-4))
    model.add(BatchNormalization())

    model.add(
        CuDNNLSTM(units=20,
                  return_sequences=True,
                  kernel_regularizer=regularizer))
    model.add(GaussianNoise(1e-4))
    model.add(BatchNormalization())

    model.add(
        CuDNNLSTM(units=10,
                  kernel_regularizer=regularizer,
                  return_sequences=False))
    model.add(GaussianNoise(1e-4))
    model.add(BatchNormalization())

    model.add(
        Dense(hyperparameters['output_sequence_length'] * len(targets),
              activation='relu'))

    model.add(
        Reshape((hyperparameters['output_sequence_length'], len(targets))))

    model.compile(optimizer=optimizer, loss='mean_absolute_error')

    #     print(model.summary())

    return model
def create_model(num_frame, num_joint, num_output):
    model = Sequential()
    model.add(
        CuDNNLSTM(50,
                  input_shape=(num_frame, num_joint),
                  return_sequences=False))
    model.add(Dropout(0.4))  #使用Dropout函数可以使模型有更多的机会学习到多种独立的表征
    model.add(Dense(60))
    model.add(Dropout(0.4))
    model.add(Dense(num_output, activation='softmax'))
    return model
示例#27
0
 def init_model(self, input_shape, num_classes, **kwargs):
     inputs = Input(shape=input_shape)
     sequence_len = input_shape[0]
     lstm_units_array = np.array([32, 64, 128, 256, 512])
     lstm_units = lstm_units_array[np.argmin(
         np.abs(lstm_units_array - sequence_len))]
     lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs)
     activation_1 = Activation('tanh')(lstm_1)
     if num_classes >= 20:
         if num_classes < 30:
             dropout1 = SpatialDropout1D(0.5)(activation_1)
             attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
         else:
             attention_1 = Attention(
                 8, 16)([activation_1, activation_1, activation_1])
         k_num = 10
         kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k(
             tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0],
                                                  shape=[-1, k_num, 128]))(
                                                      attention_1)
         flatten = Flatten()(kmaxpool_l)
         dropout2 = Dropout(rate=0.5)(flatten)
     else:
         dropout1 = SpatialDropout1D(0.5)(activation_1)
         attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
         pool_l = GlobalMaxPool1D()(attention_1)
         dropout2 = Dropout(rate=0.5)(pool_l)
     dense_1 = Dense(units=256, activation='relu')(dropout2)
     #         dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01),
     #                        activity_regularizer=regularizers.l1(0.01))(dropout2)
     #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2)
     outputs = Dense(units=num_classes, activation='softmax')(dense_1)
     loss_fun = CategoricalCrossentropy(label_smoothing=0.2)
     model = TFModel(inputs=inputs, outputs=outputs)
     optimizer = optimizers.Nadam(lr=0.002,
                                  beta_1=0.9,
                                  beta_2=0.999,
                                  epsilon=None,
                                  schedule_decay=0.004)
     model.compile(
         optimizer=optimizer,
         loss=loss_fun,
         #loss="sparse_categorical_crossentropy",
         metrics=['accuracy'])
     model.summary()
     self._model = model
     self.is_init = True
示例#28
0
def Create_pretrained_model(dim, n_sequence, n_channels, n_output):
    model = Sequential()
    model.add(
        TimeDistributed(MobileNetV2(weights='imagenet', include_top=False),
                        input_shape=(n_sequence, *dim, n_channels)))
    model.add(TimeDistributed(GlobalAveragePooling2D()))
    model.add(CuDNNLSTM(64, return_sequences=False))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(24, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_output, activation='softmax'))

    model.compile(optimizer='sgd',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model
示例#29
0
    def init_model(self,
                   input_shape,
                   num_classes,
                   **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=2)(inputs)
        sequence_len = input_shape[0]
        lstm_units_array = np.array([32, 64, 128, 256, 512])
        lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))]
        lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1',
                                         return_sequences=True),
                               merge_mode='concat')(inputs)
        activation_1 = Activation('tanh')(lstm_1)
        dropout1 = SpatialDropout1D(0.5)(activation_1)
        if lstm_units <=128:
            attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        else:
            attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        pool_1 = GlobalMaxPool1D()(attention_1)
        dropout2 = Dropout(rate=0.5)(pool_1)
        dense_1 = Dense(units=256, activation='relu')(dropout2)
#         dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01),
#                        activity_regularizer=regularizers.l1(0.01))(dropout2)
        #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2)
        outputs = Dense(units=num_classes, activation='softmax')(dense_1)

        model = TFModel(inputs=inputs, outputs=outputs)
        loss_fun = CategoricalCrossentropy(label_smoothing=0.2)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(
            optimizer=optimizer,
            loss=loss_fun,
            #loss="sparse_categorical_crossentropy",
            metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
示例#30
0
    def init_model(self,
                   input_shape,
                   num_classes,
                   **kwargs):
        inputs = Input(shape=input_shape)
        lstm_1 = CuDNNLSTM(128, return_sequences=True)(inputs)
        activation_1 = Activation('tanh')(lstm_1)
        if num_classes >= 20:
            if num_classes < 30:
                dropout1 = SpatialDropout1D(0.5)(activation_1)
                attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
            # no dropout to get more infomation for classifying a large number
            # classes
            else:
                attention_1 = Attention(8, 16)(
                    [activation_1, activation_1, activation_1])
            k_num = 10
            kmaxpool_l = Lambda(
                lambda x: tf.reshape(tf.nn.top_k(tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0],
                                     shape=[-1, k_num, 128]))(attention_1)
            flatten = Flatten()(kmaxpool_l)
            dropout2 = Dropout(rate=0.5)(flatten)
        else:
            dropout1 = SpatialDropout1D(0.5)(activation_1)
            attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
            pool_l = GlobalMaxPool1D()(attention_1)
            dropout2 = Dropout(rate=0.5)(pool_l)
        dense_1 = Dense(units=256, activation='softplus')(dropout2)
        outputs = Dense(units=num_classes, activation='softmax')(dense_1)

        model = TFModel(inputs=inputs, outputs=outputs)
        optimizer = optimizers.Nadam(
            lr=0.002,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=None,
            schedule_decay=0.004)
        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True