def context_gru_and_capsule_net(left_pickle, right_pickle, dropout_rate=0.46): Routings = 5 Num_capsule = 10 Dim_capsule = 32 left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature( left_pickle) right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature( right_pickle) left_sequence = Input(shape=(left_maxlen, ), dtype='int32') left_embedded = Embedding(input_dim=left_max_features, output_dim=left_num_features, input_length=left_maxlen, weights=[left_W], trainable=False)(left_sequence) left_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(left_embedded) left_capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(left_enc) left_output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))( left_capsule) #left_capsule = Flatten()(left_capsule) right_sequence = Input(shape=(right_maxlen, ), dtype='int32') right_embedded = Embedding(input_dim=right_max_features, output_dim=right_num_features, input_length=right_maxlen, weights=[right_W], trainable=False)(right_sequence) right_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(right_embedded) right_capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(right_enc) right_output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))( right_capsule) #right_capsule = Flatten()(output_capsule) x = Concatenate()([left_output_capsule, right_output_capsule]) capsule = Dense(128)(x) output = Dense(6, activation='softmax')(capsule) model = Model(inputs=[left_sequence, right_sequence], outputs=output) return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, \ right_y_dev, right_test, y_test
def capsule_model(Num_capsule): Routings = 20 Num_capsule = Num_capsule Dim_capsule = 120 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) capsule = Flatten()(capsule) capsule = Dropout(0.1)(capsule) output = Dense(4, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) model.summary() return model
def gru_and_capsule_net(maxlen, max_features, num_features, W, dropout=0.0): Routings = 5 Num_capsule = 10 Dim_capsule = 32 sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence) enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout, return_sequences=True))(embedded) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(enc) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(dropout)(capsule) output = Dense(6, activation='softmax')(capsule) model = Model(inputs=sequence, outputs=output) return model
def capsulnetModel(embeddingMatrix,embedding_dim,hidden_dim, name): """Constructs the architecture of the modelEMOTICONS_TOKEN[list_str[index]] Input: embeddingMatrix : The embedding matrix to be loaded in the embedding layer. Output: model : A basic LSTM model """ Routings = 5 Num_capsule = 10 Dim_capsule = 32 embedding_layer = Embedding(embeddingMatrix.shape[0], embedding_dim, weights=[embeddingMatrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(NUM_CLASSES, activation='softmax')(capsule) model = Model(inputs=sequence_input, outputs=output) rmsprop = optimizers.rmsprop(lr=LEARNING_RATE) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc']) return model, name
def capsule(maxlen, max_features, num_features, W, hidden_dim=160, dropout_rate=0.46): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) embedded = Dropout(dropout_rate)(embedded) # bi-lstm #embedded = GRU(hidden_dim, recurrent_dropout=0.28, return_sequences=True)(embedded) enc = GRU(hidden_dim, recurrent_dropout=0.28)(embedded) capusleVec = Capsule(num_capsule=6, routings=3, kernel_size=(3, 1))(enc) dense = Dense(128, activation="relu")(capusleVec) output = Dense(6, activation='softmax')(dense) model = Model(inputs=sequence, outputs=output) return model
def capsulnet_model(hidden_dim=100): Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(3, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) rmsprop = optimizers.rmsprop(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy', f1]) model.summary() return model
def capsulnet_elmo(batch_size, nb_epoch, hidden_dim, num): Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen2,), dtype='int32') embedded_sequences = Embedding(input_dim=W2.shape[0], output_dim=W2.shape[1], input_length=maxlen2, weights=[W2], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(3, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) rmsprop = optimizers.rmsprop(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy', f1]) class_weight = {0: 1, 1: 2, 2: 6} train_num, test_num = X_train2.shape[0], X_test2.shape[0] num1 = y_train2.shape[1] second_level_train_set = np.zeros((train_num, num1)) second_level_test_set = np.zeros((test_num, num1)) test_nfolds_sets = [] # kf = KFold(n_splits = 2) kf = KFold(n_splits=5) for i, (train_index, test_index) in enumerate(kf.split(X_train2)): x_tra, y_tra = X_train2[train_index], y_train2[train_index] x_tst, y_tst = X_train2[test_index], y_train2[test_index] model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2, class_weight=class_weight) second_level_train_set[test_index] = model.predict(x_tst, batch_size=batch_size) test_nfolds_sets.append(model.predict(X_test2)) for item in test_nfolds_sets: second_level_test_set += item second_level_test_set = second_level_test_set / 5 model.save("weights_elmo_capsulnet" + num + ".hdf5") y_pred = second_level_test_set return y_pred
def capsulenet_gru(num): pickle_file = os.path.join('pickle', 'test_trial_train.pickle') revs, W, word_idx_map, vocab, maxlen = pickle.load(open(pickle_file, 'rb')) X_train, X_trial, X_test, y_train, y_trial, lex_train, lex_trial, lex_test = make_idx_data(revs, word_idx_map, maxlen=maxlen) max_features = W.shape[0] num_features = W.shape[1] # 400 # Keras Model Routings = 5 Num_capsule = 10 Dim_capsule = 32 embedding_layer = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) sequence_input = Input(shape=(maxlen,), dtype='int32') lex_input = Input(shape=(43,), dtype='float32') embedded_sequences = embedding_layer(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) dense = Concatenate(axis=-1)([capsule, lex_input]) output = Dense(6, activation='softmax')(dense) model = Model(inputs=[sequence_input, lex_input], outputs=output) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x=[X_train, lex_train], y=y_train, batch_size=830, epochs=14, validation_data=([X_trial, lex_trial], y_trial)) model.save('capsulenet_gru_'+ num +'.h5')
def capsulnet_model(batch_size, nb_epoch, hidden_dim, num): Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(2, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) model.fit( X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2, shuffle=False, ) model.save("weights_capsulnet" + num + ".hdf5") y_pred = model.predict(X_test, batch_size=batch_size) return y_pred
def capsulnet_model(batch_size, nb_epoch, hidden_dim, num): Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(2, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor='val_acc', patience = 5, verbose=1) class_weight = {0: 1, 1: 7} train_num, test_num = X_train.shape[0], X_dev.shape[0] num1 = y_train.shape[1] second_level_train_set = np.zeros((train_num, num1)) # (10556,) second_level_test_set = np.zeros((test_num, num1)) # (2684,) test_nfolds_sets = [] kf = KFold(n_splits=5) for i, (train_index, test_index) in enumerate(kf.split(X_train)): x_tra, y_tra = X_train[train_index], y_train[train_index] x_tst, y_tst = X_train[test_index], y_train[test_index] # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) early_stopping = EarlyStopping(monitor='val_acc', patience=8, verbose=1) model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2, class_weight=class_weight, callbacks=[early_stopping]) second_level_train_set[test_index] = model.predict( x_tst, batch_size=batch_size ) # (2112,2) could not be broadcast to indexing result of shape (2112,) test_nfolds_sets.append(model.predict(X_dev)) for item in test_nfolds_sets: second_level_test_set += item second_level_test_set = second_level_test_set / 5 model.save("weights_BB_capsulnet_lstm" + num + ".hdf5") y_pred = second_level_test_set return y_pred
# Dim_capsule = 120 Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(2, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor='val_acc', patience = 5, verbose=1) model.fit( X_train, y_train,
Num_capsule = 10 Dim_capsule = 32 embedding_layer = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) sequence_input = Input(shape=(maxlen, ), dtype='int32') lex_input = Input(shape=(43, ), dtype='float32') embedded_sequences = embedding_layer(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) dense = Concatenate(axis=-1)([capsule, lex_input]) output = Dense(6, activation='softmax')(dense) model = Model(inputs=[sequence_input, lex_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) h = train(model, batch_size, nb_epoch, X_train, y_train, X_trial, y_trial,
def interActiveCapsule(left_pickle, right_pickle, hidden_dim=160, dropout_rate=0.46, capsule_dim=32, input_kernel_size=12): Routings = 3 #更改 Num_capsule = 6 Dim_capsule = capsule_dim left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature( left_pickle) right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature( right_pickle) left_sequence = Input(shape=(left_maxlen, ), dtype='int32') left_embedded = Embedding(input_dim=left_max_features, output_dim=left_num_features, input_length=left_maxlen, weights=[left_W], trainable=False)(left_sequence) left_embedded = Dropout(dropout_rate)(left_embedded) # bi-lstm left_embedded = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(left_embedded) left_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(left_embedded) # left_capsule = Flatten()(left_capsule) right_sequence = Input(shape=(right_maxlen, ), dtype='int32') right_embedded = Embedding(input_dim=right_max_features, output_dim=right_num_features, input_length=right_maxlen, weights=[right_W], trainable=False)(right_sequence) right_embedded = Dropout(dropout_rate)(right_embedded) right_embedded = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(right_embedded) right_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(right_embedded) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) # right_capsule = Flatten()(right_capsule) #comboVec = Concatenate(axis=1)([left_enc, right_enc]) interActivateVec = interActivate(hidden_dims=hidden_dim)( [left_enc, right_enc]) print("input_size", interActivateVec) tanh_inter_left = Tanh()(interActivateVec) inter_trans = TransMatrix()(interActivateVec) tanh_inter_right = Tanh()(inter_trans) scaledPool_inter_left = MaxPooling1D(pool_size=165)(tanh_inter_left) scaledPool_inter_left = Reshape((165, ))(scaledPool_inter_left) print("scaledPool_inter_left ", scaledPool_inter_left) scaledPool_inter_right = MaxPooling1D(pool_size=165)(tanh_inter_right) scaledPool_inter_right = Reshape((165, ))(scaledPool_inter_right) softmax_inter_left = Softmax()(scaledPool_inter_left) softmax_inter_left = Dropout(dropout_rate)(softmax_inter_left) softmax_inter_left_1 = Dense(165, activation="softmax")(softmax_inter_left) softmax_inter_left_1 = Dropout(dropout_rate)(softmax_inter_left_1) softmax_inter_right = Softmax()(scaledPool_inter_right) softmax_inter_right = Dropout(dropout_rate)(softmax_inter_right) softmax_inter_right_1 = Dense(165, activation="softmax")(softmax_inter_right) softmax_inter_right_1 = Dropout(dropout_rate)(softmax_inter_right_1) softmax_inter_left = Dot(axes=1)([left_enc, softmax_inter_left_1]) print("softmax_inter_left", softmax_inter_left, left_enc) softmax_inter_right = Dot(axes=1)([right_enc, softmax_inter_right_1]) print("softmax_inter_right", softmax_inter_right, right_enc) comboVec = Concatenate(axis=1)([softmax_inter_left, softmax_inter_right]) comboVec = Reshape((-1, 2 * hidden_dim))(comboVec) comboVec_dropout = Dropout(dropout_rate)(comboVec) #print("comboVect: ", comboVec) #combo_gru = Bidirectional(GRU(hidden_dim,dropout=0.08,return_sequences=True))(comboVec) #combo_gru = Bidirectional(GRU(24, dropout=0.08))(combo_gru) #combo_gru = Flatten(combo_gru) ''' output1 = Dense(128, activation="relu")(comboVec) output1 = Dropout(0.34)(output1) output2 = Dense(64, activation="relu")(output1) output2 = Dropout(0.25)(output2) output3 = Dense(32, activation="relu")(output2) output3 = Dropout(0.12)(output3) ''' my2dCapsule = Capsule(routings=Routings, num_capsule=Num_capsule, dim_capsule=Dim_capsule, kernel_size=input_kernel_size)(comboVec_dropout) my2dCapsule_dropout = Dropout(dropout_rate)(my2dCapsule) print("capsule output: ", my2dCapsule) #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34,return_sequences=True))(my2dCapsule) #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34, return_sequences=True))(bilstm_capsule) #attentioned_capsule = AttentionM()(bilstm_capsule) output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))( my2dCapsule_dropout) #my2dCapsule = Flatten()(my2dCapsule) output = Dense(6, activation="softmax")(output_capsule) print("output: ", output) model = Model(inputs=[left_sequence, right_sequence], outputs=output) return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, \ right_y_dev, right_test, y_test