def rank_attention_lstm_model(batch_size, nb_epoch, hidden_dim):

    sequence = Input(shape=(maxlen,), dtype='int32')

    embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True,
                         weights=[W], trainable=False)(sequence)
    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence)
    embedded = Dropout(0.25)(embedded)

    enc = Bidirectional(GRU(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded)

    att = AttentionM()(enc)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(6, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1])

    model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2, )


    # model.save("weights_rank_attention" + num  + ".hdf5")
    y_pred = model.predict(X_test, batch_size=batch_size)

    return y_pred
def attention_model(DROPOUT=0.25):
    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)
    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence)
    embedded = Dropout(0.25)(embedded)

    enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT,
                            return_sequences=True))(embedded)
    enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT,
                            return_sequences=True))(enc)

    att = AttentionM()(enc)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(3, activation='softmax')(fc2_dropout)

    model = Model(inputs=sequence, outputs=output)

    rmsprop = optimizers.rmsprop(lr=0.001)

    model.compile(loss='categorical_crossentropy',
                  optimizer=rmsprop,
                  metrics=['acc', f1])

    model.summary()
    return model
def buildModel(embeddingMatrix):
    """Constructs the architecture of the modelEMOTICONS_TOKEN[list_str[index]]
    Input:
        embeddingMatrix : The embedding matrix to be loaded in the embedding layer.
    Output:
        model : A basic LSTM model
    """
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embeddingLayer = Embedding(embeddingMatrix.shape[0],
                                EMBEDDING_DIM,
                                weights=[embeddingMatrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False)(sequence)
    enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(embeddingLayer)
    enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(enc)
    att = AttentionM()(enc)
    fc1 = Dense(128, activation="relu")(att)
    fc2_dropout = Dropout(0.25)(fc1)
    output = Dense(4, activation='sigmoid')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)
    rmsprop = optimizers.rmsprop(lr=LEARNING_RATE)
    model.compile(loss='categorical_crossentropy', optimizer=optimizers.adam(), metrics=['acc'])


    return model
def attention_lstm_model(r_dropout):
    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)

    embedded = Dropout(0.25)(embedded)

    enc = Bidirectional(
        GRU(hidden_dim // 2,
            recurrent_dropout=r_dropout,
            return_sequences=True))(embedded)

    att = AttentionM()(enc)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(6, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)

    # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
    # early_stopping = EarlyStopping(monitor="val_loss", patience=14, verbose=1)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc', f1])
    model.summary()
    return model
Exemplo n.º 5
0
def attention_elmo(batch_size = 128, nb_epoch = 35, hidden_dim = 80):
    sequence = Input(shape=(maxlen2,), dtype='int32')

    embedded = Embedding(input_dim=W2.shape[0], output_dim=W2.shape[1], input_length=maxlen2, mask_zero=True,
                         weights=[W2], trainable=False)(sequence)

    embedded = Dropout(0.25)(embedded)

    enc = Bidirectional(GRU(hidden_dim, dropout=0.35, return_sequences=True))(embedded)
    enc = Bidirectional(GRU(hidden_dim, dropout=0.35, return_sequences=True))(enc)

    att = AttentionM()(enc)


    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(3, activation='softmax')(fc2_dropout)

    model = Model(inputs=sequence, outputs=output)

    rmsprop = optimizers.rmsprop(lr=0.001)

    model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc', f1])

    class_weight = {0: 1, 1: 2, 2: 6}

    train_num, test_num = X_train2.shape[0], X_test2.shape[0]
    # train_num, test_num = X_train2.shape[0], X_dev2.shape[0]
    num1 = y_train2.shape[1]

    second_level_train_set = np.zeros((train_num, num1))

    second_level_test_set = np.zeros((test_num, num1))

    test_nfolds_sets = []

    # kf = KFold(n_splits = 2)
    kf = KFold(n_splits=5)

    for i, (train_index, test_index) in enumerate(kf.split(X_train2)):
        x_tra, y_tra = X_train2[train_index], y_train2[train_index]

        x_tst, y_tst = X_train2[test_index], y_train2[test_index]

        model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2,
                  class_weight=class_weight)

        second_level_train_set[test_index] = model.predict(x_tst, batch_size=batch_size)

        test_nfolds_sets.append(model.predict(X_test2))
        # test_nfolds_sets.append(model.predict(X_dev2))
    for item in test_nfolds_sets:
        second_level_test_set += item

    second_level_test_set = second_level_test_set / 5

    return second_level_train_set, second_level_test_set
def bi_gru_attention(left_pickle, right_pickle, dropoout=0.36, hidden_dim=160):
    print("this is bi_gru_attention model")
    left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature(
        left_pickle)
    right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature(
        right_pickle)

    left_sequence = Input(shape=(left_maxlen, ), dtype='int32')
    left_embedded = Embedding(input_dim=left_max_features,
                              output_dim=left_num_features,
                              input_length=left_maxlen,
                              weights=[left_W],
                              trainable=False)(left_sequence)
    left_enc = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropoout,
            return_sequences=True))(left_embedded)
    left_att = AttentionM()(left_enc)
    print(np.shape(left_enc))

    right_sequence = Input(shape=(right_maxlen, ), dtype='int32')
    right_embedded = Embedding(input_dim=right_max_features,
                               output_dim=right_num_features,
                               input_length=right_maxlen,
                               weights=[right_W],
                               trainable=False)(right_sequence)
    right_enc = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropoout,
            return_sequences=True))(right_embedded)
    right_att = AttentionM()(right_enc)
    print(np.shape(right_enc))

    comb = Concatenate()([left_att, right_att])

    output = Dense(6, activation='softmax')(comb)
    model = Model(inputs=[left_sequence, right_sequence], outputs=output)

    return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test
def gru_and_attention(maxlen, max_features, num_features, W, dropout=0.0):
    sequence = Input(shape=(maxlen, ), dtype='int32')
    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)
    embedded = Dropout(dropout)(embedded)
    gru = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropout,
            return_sequences=True))(embedded)
    att = AttentionM()(gru)
    output = Dense(6, activation='softmax')(att)
    model = Model(inputs=sequence, outputs=output)
    return model
def attentionModel(embeddingMatrix, embedding_dim, hidden_dim, name):
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embeddingLayer = Embedding(embeddingMatrix.shape[0],
                               embedding_dim,
                               weights=[embeddingMatrix],
                               input_length=MAX_SEQUENCE_LENGTH,
                               trainable=False)(sequence)
    enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(embeddingLayer)
    enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(enc)
    att = AttentionM()(enc)
    fc1 = Dense(128, activation="relu")(att)
    fc2_dropout = Dropout(0.25)(fc1)
    output = Dense(4, activation='sigmoid')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)
    rmsprop = optimizers.rmsprop(lr=LEARNING_RATE)
    model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc'])

    return model, name
Exemplo n.º 9
0
    def gru(self):
        model = Sequential()
        model.add(Embedding(
            input_dim=self.max_features,
            output_dim=self.num_features,
            input_length=self.maxlen,
            #mask_zero=True,
            weights=[self.weights],
            trainable=False
        ))
        model.add(Dropout(0.5))
        model.add(GRU(self.hidden_dims // 2, recurrent_dropout=0.25, return_sequences=True))
        model.add(AttentionM())
        model.add(Dropout(0.25))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='mse', optimizer='adam')

        return model
def attention_bi_lstm_model():
    batch_size = 256
    nb_epoch = 40
    hidden_dim = 120

    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)
    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence)
    embedded = Dropout(0.25)(embedded)

    # bi-lstm
    enc = Bidirectional(
        LSTM(hidden_dim // 2, recurrent_dropout=0.25,
             return_sequences=True))(embedded)

    # gru
    # enc = Bidirectional(GRU(hidden_dim//2, recurrent_dropout=0.2, return_sequences=True)) (embedded)

    att = AttentionM()(enc)

    # print(enc.shape)
    # print(att.shape)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(2, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)
    class_weight = {0: 1, 1: 7}

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc', f1])

    model.summary()
    return model
Exemplo n.º 11
0
def type_attention_lstm_model(batch_size, nb_epoch, hidden_dim):
    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)

    embedded = Dropout(0.25)(embedded)

    # gru
    enc = Bidirectional(
        GRU(hidden_dim // 2, recurrent_dropout=0.2,
            return_sequences=True))(embedded)

    att = AttentionM()(enc)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(4, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)

    # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
    # early_stopping = EarlyStopping(monitor="val_loss", patience=8, verbose=1)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc', f1])

    model.fit(X_train,
              y_train,
              validation_data=[X_dev, y_dev],
              batch_size=batch_size,
              epochs=nb_epoch,
              verbose=2)
    y_pred = model.predict(X_dev, batch_size=batch_size)

    return y_pred
Exemplo n.º 12
0
def attentionModel(embeddingMatrix):
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embeddingLayer = Embedding(embeddingMatrix.shape[0],
                               embeddingMatrix.shape[1],
                               weights=[embeddingMatrix],
                               input_length=MAX_SEQUENCE_LENGTH,
                               mask_zero=emb_mask_zero,
                               trainable=emb_trainable)(sequence)
    enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT,
                            return_sequences=True))(embeddingLayer)
    enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT,
                            return_sequences=True))(enc)
    att = AttentionM()(enc)
    fc1 = Dense(128, activation="relu")(att)
    fc2_dropout = Dropout(0.25)(fc1)
    output = Dense(NUM_CLASSES, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)
    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['acc'])

    return model
Exemplo n.º 13
0
    # this is the placeholder tensor for the input sequence
    sequence = keras.layers.Input(shape=(maxlen,), dtype='int32')

    embedded = keras.layers.Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen,
                                      mask_zero=True, weights=[W], trainable=False)(sequence)
    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence)
    embedded = keras.layers.Dropout(0.25)(embedded)

    # bi-lstm
    # enc = Bidirectional(LSTM(hidden_dim//2, recurrent_dropout=0.25, return_sequences=True)) (embedded)

    # gru
    enc = keras.layers.Bidirectional(keras.layers.GRU(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(
        embedded)

    att = AttentionM()(enc)

    # print(enc.shape)
    # print(att.shape)

    fc1_dropout = keras.layers.Dropout(0.25)(att)
    fc1 = keras.layers.Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = keras.layers.Dropout(0.25)(fc1)

    output = keras.layers.Dense(2, activation='softmax')(fc2_dropout)
    model = keras.Model(inputs=sequence, outputs=output)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

    history = model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch)
    y_pred = model.predict(X_test, batch_size=batch_size)
def attention_lstm_model(batch_size, nb_epoch, hidden_dim, num):
    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)

    embedded = Dropout(0.25)(embedded)

    # bi-lstm
    enc = Bidirectional(
        LSTM(hidden_dim // 2, recurrent_dropout=0.25,
             return_sequences=True))(embedded)

    # gru
    # enc = Bidirectional(GRU(hidden_dim//2, recurrent_dropout=0.2, return_sequences=True)) (embedded)

    att = AttentionM()(enc)

    # print(enc.shape)
    # print(att.shape)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(2, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)
    class_weight = {0: 1, 1: 7}

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc', f1])

    train_num, test_num = X_train.shape[0], X_dev.shape[0]
    num1 = y_train.shape[1]

    second_level_train_set = np.zeros((train_num, num1))  # (10556,)

    second_level_test_set = np.zeros((test_num, num1))  # (2684,)

    test_nfolds_sets = []

    kf = KFold(n_splits=5)

    for i, (train_index, test_index) in enumerate(kf.split(X_train)):
        x_tra, y_tra = X_train[train_index], y_train[train_index]

        x_tst, y_tst = X_train[test_index], y_train[test_index]

        # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
        early_stopping = EarlyStopping(monitor='val_acc',
                                       patience=10,
                                       verbose=1)

        model.fit(x_tra,
                  y_tra,
                  validation_data=[x_tst, y_tst],
                  batch_size=batch_size,
                  epochs=nb_epoch,
                  verbose=2,
                  class_weight=class_weight,
                  callbacks=[early_stopping])

        second_level_train_set[test_index] = model.predict(
            x_tst, batch_size=batch_size)

        test_nfolds_sets.append(model.predict(X_dev))
    for item in test_nfolds_sets:
        second_level_test_set += item

    second_level_test_set = second_level_test_set / 5

    model.save("weights_BB_attention_lstm" + num + ".hdf5")

    y_pred = second_level_test_set

    return y_pred
Exemplo n.º 15
0
def RNN(X_train, y_train, args):
    """
    Purpose -> Define and train the proposed LSTM network
    Input   -> Data, Labels and model hyperparameters
    Output  -> Trained LSTM network
    """
    # Sets the model hyperparameters
    # Embedding hyperparameters
    max_features = args[0]
    maxlen = args[1]
    embedding_size = args[2]
    # Convolution hyperparameters
    filter_length = args[3]
    nb_filter = args[4]
    pool_length = args[5]
    # LSTM hyperparameters
    lstm_output_size = args[6]
    # Training hyperparameters
    batch_size = args[7]
    nb_epoch = args[8]
    numclasses = args[9]
    test_size = args[10]

    # Format conversion for y_train for compatibility with Keras
    y_train = np_utils.to_categorical(y_train, numclasses)
    print(y_train)
    # Train & Validation data splitting
    X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                          y_train,
                                                          test_size=test_size,
                                                          random_state=42)

    # Build the sequential model
    # Model Architecture is:
    # Input -> Embedding -> Conv1D+Maxpool1D -> LSTM -> LSTM -> FC-1 -> Softmaxloss
    print('Build model...')
    start = time()
    log_dir = datetime.now().strftime('model_%Y%m%d_%H%M')
    os.mkdir(log_dir)

    es = EarlyStopping(monitor='val_loss', patience=20)
    mc = ModelCheckpoint(log_dir +
                         '\\CIFAR10-EP{epoch:02d}-ACC{val_acc:.4f}.h5',
                         monitor='val_loss',
                         save_best_only=True)
    tb = TensorBoard(log_dir=log_dir, histogram_freq=0)

    sequence = Input(shape=(maxlen, ), dtype='int32')

    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False) (sequence)
    embedded = Embedding(input_dim=max_features,
                         output_dim=embedding_size,
                         input_length=maxlen,
                         trainable=False)(sequence)
    embedded = Dropout(0.25)(embedded)
    convolution = Convolution1D(filters=nb_filter,
                                filter_length=filter_length,
                                padding='valid',
                                activation='relu',
                                strides=1)(embedded)
    maxpooling = MaxPooling1D(pool_length=pool_length)(convolution)
    lstm = LSTM(lstm_output_size,
                dropout_W=0.2,
                dropout_U=0.2,
                return_sequences=True)(maxpooling)
    lstm1 = LSTM(lstm_output_size,
                 dropout_W=0.2,
                 dropout_U=0.2,
                 return_sequences=False)(lstm)
    enc = Bidirectional(
        GRU(lstm_output_size // 2,
            recurrent_dropout=0.25,
            return_sequences=True))(maxpooling)
    att = AttentionM()(enc)
    x = keras.layers.Concatenate(axis=1)([lstm1, att])
    fc1 = Dense(128, activation="relu")(x)
    fc2 = Dense(64, activation="relu")(fc1)
    fc3 = Dense(32, activation="relu")(fc2)
    fc4 = Dense(16, activation="relu")(fc3)
    fc4_dropout = Dropout(0.25)(fc4)
    output = Dense(3, activation='softmax')(fc4_dropout)
    model = Model(inputs=sequence, outputs=output)
    '''model = Sequential()
    model.add(Embedding(max_features, embedding_size, input_length=maxlen))
    model.add(Convolution1D(nb_filter=nb_filter,
                            filter_length=filter_length,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length))
    model.add(SpatialDropout1D(0.1))
    model.add(Bidirectional(CuDNNGRU(64, return_sequences=True)))
    model.add(Bidirectional(CuDNNGRU(64, return_sequences=True)))
    Routings = 5
    Num_capsule = 10
    Dim_capsule = 32
    model.add(Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True))
    model.add(Flatten())


    model.add(LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=True))
    model.add(LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=True))
    model.add(Bidirectional(LSTM(lstm_output_size//2, recurrent_dropout=0.25, return_sequences=False)))

    #model.add(AttentionM())
    model.add(Dropout(0.25))
    model.add(Dense(numclasses,activation='softmax'))'''

    # Optimizer is Adamax along with categorical crossentropy loss
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adamax',
        metrics=['accuracy'],
    )
    print(model.summary())
    history = LossHistory()

    print('Train...')
    # Trains model for 50 epochs with shuffling after every epoch for training data and validates on validation data
    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              shuffle=True,
              nb_epoch=nb_epoch,
              validation_data=(X_valid, y_valid),
              callbacks=[history, es, mc, tb])

    history.loss_plot('epoch')
    return model
Exemplo n.º 16
0
def attention_lstm_model():
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('pickle', 'type_train_val_test2.pickle3')

    revs, W, word_idx_map, vocab, maxlen = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    X_train, X_test, X_dev, y_train, y_dev = make_idx_data(revs,
                                                           word_idx_map,
                                                           maxlen=maxlen)

    n_train_sample = X_train.shape[0]
    logging.info("n_train_sample [n_train_sample]: %d" % n_train_sample)

    n_test_sample = X_test.shape[0]
    logging.info("n_test_sample [n_train_sample]: %d" % n_test_sample)

    len_sentence = X_train.shape[1]  # 200
    logging.info("len_sentence [len_sentence]: %d" % len_sentence)

    max_features = W.shape[0]
    logging.info("num of word vector [max_features]: %d" % max_features)

    num_features = W.shape[1]  # 400
    logging.info("dimension of word vector [num_features]: %d" % num_features)

    sequence = Input(shape=(maxlen, ), dtype='int32')

    embedded = Embedding(input_dim=max_features,
                         output_dim=num_features,
                         input_length=maxlen,
                         mask_zero=True,
                         weights=[W],
                         trainable=False)(sequence)
    # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence)
    embedded = Dropout(0.25)(embedded)

    enc = Bidirectional(
        GRU(hidden_dim // 2, recurrent_dropout=0.25,
            return_sequences=True))(embedded)

    att = AttentionM()(enc)

    fc1_dropout = Dropout(0.25)(att)
    fc1 = Dense(50, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(4, activation='softmax')(fc2_dropout)
    model = Model(inputs=sequence, outputs=output)

    # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
    # early_stopping = EarlyStopping(monitor="val_loss", patience=10, verbose=1)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc', f1])

    # model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2,
    #           callbacks=[checkpointer, early_stopping])
    # y_pred = model.predict(X_dev, batch_size=batch_size)
    return model
def Attention_Model(config):
    image_input = Input(shape=(224, 224, 3))
    x = keras.layers.BatchNormalization()(image_input)
    x = Conv2D(64, (4, 4),
               activation="relu",
               kernel_regularizer=keras.regularizers.l2(1e-3))(x)
    x = MaxPooling2D()(x)
    x = keras.layers.BatchNormalization()(x)
    x = Conv2D(32, (3, 3), activation="relu")(x)
    x = Dropout(0.2)(x)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (2, 2),
               activation="relu",
               kernel_regularizer=keras.regularizers.l2(1e-3))(x)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (2, 2), activation="relu")(x)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (2, 2), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = keras.layers.Conv2D(10, (2, 2),
                            activation='relu',
                            kernel_regularizer=keras.regularizers.l2(
                                config.ker_reg_1))(x)
    fla = Flatten()(x)
    image_concat = BatchNormalization()(fla)
    # image_input = Input(shape=(2048,))
    # x = keras.layers.BatchNormalization()(image_input)
    # x = Dropout(0.5)(x)
    # image_concat = Dense(300, activation="relu", kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x)
    # image_concat = BatchNormalization()(image_concat)
    text_input = keras.Input(shape=(
        config.token_number,
        config.token_feature_vector,
    ))
    # text_input = keras.layers.Masking(mask_value=0.0,input_shape=(config.token_number,config.token_feature_vector,))
    # text_bit = BatchNormalization(axis=-1)(text_input)

    enc = Bidirectional(
        LSTM(300, dropout=config.rnn_dro_1, return_sequences=True))(text_input)
    enc = Bidirectional(
        LSTM(300, dropout=config.rnn_dro_1, return_sequences=True))(enc)
    enc = LSTM(160,
               dropout=config.rnn_dro_2,
               return_sequences=True,
               kernel_regularizer=keras.regularizers.l2(config.ker_reg_2))(enc)
    att = AttentionM()(enc)
    att = BatchNormalization()(att)
    concat = keras.layers.concatenate([image_concat, att], axis=1)
    concat_dropout_1 = BatchNormalization()(concat)
    concat_dropout_1 = Dropout(config.concat_dropout_1)(concat_dropout_1)

    concat_Dense_1 = Dense(config.concat_1,
                           activation="relu",
                           kernel_regularizer=keras.regularizers.l1(
                               config.ker_reg_3))(concat_dropout_1)

    concat_dropout_2 = Dropout(config.concat_dropout_2)(concat_Dense_1)

    dense2 = Dense(64, activation="relu", name="Dense_2")(concat_dropout_2)
    concat_dropout_3 = Dropout(config.concat_dropout_3)(dense2)
    dense3 = Dense(
        3,
        activation='softmax',
    )(concat_dropout_3)

    model = keras.Model([image_input, text_input], dense3)
    model.summary()
    model.compile(optimizer=keras.optimizers.adam(),
                  loss=keras.losses.categorical_crossentropy,
                  metrics=['acc'])
    return model
def interActive_bilstm_attention(left_pickle, right_pickle, hidden_dim,
                                 dropout_rate, capsule_dim):
    Routings = 3  #更改
    Num_capsule = 6
    Dim_capsule = capsule_dim

    left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature(
        left_pickle)
    right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature(
        right_pickle)

    left_sequence = Input(shape=(left_maxlen, ), dtype='int32')
    left_embedded = Embedding(input_dim=left_max_features,
                              output_dim=left_num_features,
                              input_length=left_maxlen,
                              weights=[left_W],
                              trainable=False)(left_sequence)
    left_embedded = Dropout(dropout_rate)(left_embedded)
    # bi-lstm
    left_embedded = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropout_rate,
            return_sequences=True))(left_embedded)
    left_enc = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropout_rate,
            return_sequences=True))(left_embedded)

    # left_capsule = Flatten()(left_capsule)

    right_sequence = Input(shape=(right_maxlen, ), dtype='int32')
    right_embedded = Embedding(input_dim=right_max_features,
                               output_dim=right_num_features,
                               input_length=right_maxlen,
                               weights=[right_W],
                               trainable=False)(right_sequence)
    right_embedded = Dropout(dropout_rate)(right_embedded)
    right_embedded = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropout_rate,
            return_sequences=True))(right_embedded)
    right_enc = Bidirectional(
        GRU(hidden_dim, recurrent_dropout=dropout_rate,
            return_sequences=True))(right_embedded)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    # right_capsule = Flatten()(right_capsule)

    #comboVec = Concatenate(axis=1)([left_enc, right_enc])

    interActivateVec = interActivate(hidden_dims=hidden_dim)(
        [left_enc, right_enc])
    print("input_size", interActivateVec)

    tanh_inter_left = Tanh()(interActivateVec)
    inter_trans = TransMatrix()(interActivateVec)
    tanh_inter_right = Tanh()(inter_trans)

    scaledPool_inter_left = MaxPooling1D(pool_size=165)(tanh_inter_left)
    scaledPool_inter_left = Reshape((165, ))(scaledPool_inter_left)
    print("scaledPool_inter_left ", scaledPool_inter_left)
    scaledPool_inter_right = MaxPooling1D(pool_size=165)(tanh_inter_right)
    scaledPool_inter_right = Reshape((165, ))(scaledPool_inter_right)
    print("scaledPool_inter_right ", scaledPool_inter_right)

    softmax_inter_left = Softmax()(scaledPool_inter_left)
    softmax_inter_right = Softmax()(scaledPool_inter_right)

    softmax_inter_left = Dot(axes=1)([left_enc, softmax_inter_left])
    print("softmax_inter_left", softmax_inter_left, left_enc)
    softmax_inter_right = Dot(axes=1)([right_enc, softmax_inter_right])
    print("softmax_inter_right", softmax_inter_right, right_enc)

    comboVec = Concatenate(axis=1)([softmax_inter_left, softmax_inter_right])
    comboVec = Reshape((-1, 2 * hidden_dim))(comboVec)
    comboVec_dropout = Dropout(dropout_rate)(comboVec)
    #print("comboVect: ", comboVec)
    combo_gru = Bidirectional(
        LSTM(hidden_dim, dropout=dropout_rate,
             return_sequences=True))(comboVec_dropout)
    combo_gru_att = AttentionM()(combo_gru)
    #combo_gru = Flatten(combo_gru)
    '''
    output1 = Dense(128, activation="relu")(comboVec)
    output1 = Dropout(0.34)(output1)
    output2 = Dense(64, activation="relu")(output1)
    output2 = Dropout(0.25)(output2)
    output3 = Dense(32, activation="relu")(output2)
    output3 = Dropout(0.12)(output3)
    '''

    #my2dCapsule = Capsule(routings=Routings,num_capsule=Num_capsule,dim_capsule=Dim_capsule,
    #kernel_size=input_kernel_size)(comboVec_dropout)
    #my2dCapsule_dropout = Dropout(dropout_rate)(comboVec_dropout)
    print("capsule output: ", combo_gru_att)
    #comboVec_dropout = Flatten()(comboVec_dropout)
    #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34,return_sequences=True))(my2dCapsule)
    #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34, return_sequences=True))(bilstm_capsule)
    #attentioned_capsule = AttentionM()(bilstm_capsule)
    #output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(my2dCapsule_dropout)
    #my2dCapsule = Flatten()(my2dCapsule)
    output = Dense(6, activation="softmax")(combo_gru_att)
    print("output: ", output)
    model = Model(inputs=[left_sequence, right_sequence], outputs=output)

    return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, \
           right_y_dev, right_test, y_test
Exemplo n.º 19
0
def Attention_model(config):
    K.set_image_data_format('channels_last')
    image_input = Input(shape=(224, 224, 3), dtype='float32')
    #
    x = Conv2D(64, (5, 5), activation='relu')(image_input)
    K.set_image_data_format('channels_last')
    x = BatchNormalization(axis=1)(x)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (5, 5),
               activation='relu',
               kernel_regularizer=keras.regularizers.l2(1e-3))(x)
    x = Dropout(0.2)(x)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (4, 4), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = BatchNormalization(axis=1)(x)
    x = Conv2D(16, (3, 3), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Conv2D(6, (3, 3),
               activation='relu',
               kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x)
    fla = Flatten()(x)
    image_concat = BatchNormalization()(fla)
    # image_input = Input(shape=(2048,))
    # x = keras.layers.BatchNormalization()(image_input)
    # x = Dropout(0.5)(x)
    # image_concat = Dense(300, activation="relu", kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x)
    # image_concat = BatchNormalization()(image_concat)
    text_input = keras.Input(shape=(config.token_number,
                                    config.token_feature_vector))
    text_bit = BatchNormalization(axis=-1)(text_input)

    enc = Bidirectional(
        LSTM(300, dropout=args.rnn_dro_1, return_sequences=True))(text_bit)
    enc = Bidirectional(
        LSTM(300, dropout=args.rnn_dro_1, return_sequences=True))(text_bit)
    enc = LSTM(300,
               dropout=args.rnn_dro_2,
               return_sequences=True,
               kernel_regularizer=keras.regularizers.l2(args.ker_reg_2))(enc)
    att = AttentionM()(enc)
    att = BatchNormalization()(att)
    concat = keras.layers.concatenate([image_concat, att], axis=1)
    concat_dropout_1 = BatchNormalization()(concat)
    concat_dropout_1 = Dropout(args.concat_dropout_1)(concat_dropout_1)

    concat_Dense_1 = Dense(args.concat_1,
                           activation="relu",
                           kernel_regularizer=keras.regularizers.l1(
                               args.ker_reg_3))(concat_dropout_1)

    concat_dropout_2 = Dropout(args.concat_dropout_2)(concat_Dense_1)
    bitch_2 = keras.layers.BatchNormalization()(concat_dropout_2)
    dense2 = Dense(32, activation="relu")(bitch_2)

    dense3 = Dense(32, activation="relu")(bitch_2)

    dense4 = Dense(32, activation="relu")(bitch_2)

    dense5 = Dense(64, activation="relu")(bitch_2)

    Humour_Dense = Dense(1, activation='sigmoid', name='Humour')(dense2)
    Sarcasm_Dense = Dense(
        1,
        activation='sigmoid',
        name='Sarcasm',
    )(dense3)
    Offensive_Dense = Dense(
        1,
        activation='sigmoid',
        name='Offensive',
    )(dense4)
    motivational_Dense = Dense(
        1,
        activation='sigmoid',
        name='motivational',
    )(dense5)
    model = keras.Model(
        [image_input, text_input],
        [Humour_Dense, Sarcasm_Dense, Offensive_Dense, motivational_Dense])
    model.summary()
    model.compile(optimizer=keras.optimizers.adam(),
                  loss=keras.losses.binary_crossentropy,
                  metrics=['acc'])
    return model