vocab_size=5000 one_hot_repr=[one_hot(words,vocab_size) for words in corpus] one_hot_repr sent_length=40 embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length) embedded_docs ## Creating model embedding_vector_features=80 model=Sequential() model.add(Embedding(vocab_size,embedding_vector_features,input_length=sent_length)) model.add(LSTM(100)) model.add(Dense(1,activation='sigmoid')) model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) print(model.summary()) import numpy as np X_final=np.array(embedded_docs) y_final=np.array(y) from sklearn.model_selection import train_test_split X_train1, X_test1, y_train1, y_test1 = train_test_split(X_final, y_final, test_size=0.2, random_state=42) ### Finally Training model.fit(X_train1,y_train1,validation_data=(X_test1,y_test1),epochs=10,batch_size=64) y_pred=model.predict_classes(X_test1) y_pred
model.add(Dropout(0.4)) model.add(LSTM(100, return_sequences=True)) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(LSTM(100)) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(1, activation='sigmoid')) model.summary() #Compile model model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.01), metrics=['accuracy']) #Converting to numpy array X_final = np.array(embedded_docs) y_final = np.array(y) #Splitting dataset to training and testing from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=77) #Model training model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=100)
print ("Method = KNN with word mover's distance as described in 'From Word Embeddings To Document Distances'") model = WordMoversKNN(W_embed=embedding_weights , n_neighbors=3) model.fit( train_matrix , train_labels ) results = model.predict( test_matrix ) print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results ) )) print (sklearn.metrics.classification_report( test_labels , results )) print ("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(1, activation='sigmoid')) if num_classes == 2: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode='binary') else: model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit( train_matrix , train_labels , nb_epoch=30, batch_size=32) results = model.predict_classes( test_matrix ) print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results ) )) print (sklearn.metrics.classification_report( test_labels , results )) print ("Method = Stack of two LSTMs") np.random.seed(0) model = Sequential() model.add(Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=True, weights=[embedding_weights] )) model.add(Dropout(0.1)) model.add(LSTM(output_dim=embeddings_dim , activation='relu', return_sequences=True, init='zero')) model.add(Dropout(0.1)) model.add(LSTM(output_dim=embeddings_dim , activation='relu', init='zero')) model.add(Dense(1,init='zero',activation='linear'))
print("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add( Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(1, activation='sigmoid')) if num_classes == 2: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode='binary') else: model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(train_matrix, train_labels, nb_epoch=30, batch_size=32) results = model.predict_classes(test_matrix) print("Accuracy = " + repr(sklearn.metrics.accuracy_score(test_labels, results))) print(sklearn.metrics.classification_report(test_labels, results)) print("Method = Stack of two LSTMs") np.random.seed(0) model = Sequential() model.add( Embedding(max_features, embeddings_dim,