Пример #1
0
class Classification(Supervised):
    def __init__(self, X, y, split=True, split_ratio=0.2):
        Supervised.__init__(self, X, y, split, split_ratio)
        self.LR = None
        self.DTC = None
        self.RFC = None
        self.GNB = None

    def fit():
        """

        Acronyms
        ----------
        LR : Logistic Regression
        DTC : Decision Tree Classifier
        RFC : Random Forest Classifier
        GNB : Gaussian Naive Bayes

        Returns
        -------
        
        None

        """
        self.LR = LogisticRegression(random_state=0).fit(X_train, y_train)
        self.DTC = DecisionTreeClassifier().fit(X_train, y_train)
        self.RFC = RandomForestClassifier(max_depth=None, random_state=0).fit(
            X_train, y_train)
        self.GNB = GaussianNB().fit(X_train, y_train)

    def evaluate():
        if (self.X_test != None):
            lr_eval = self.LR.evaluate(X_test, y_test)
            dtc_eval = self.DTC.evaluate(X_test, y_test)
            rfc_eval = self.RFC.evaluate(X_test, y_test)
            gnb_eval = self.GNB.evaluate(X_test, y_test)
Пример #2
0
model.add(Dense(1, init="uniform", activation="sigmoid"))
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam')
model.summary()
history=model.fit(X_train,Y_train, epochs=100, batch_size=100)


plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


model.evaluate(X_test,Y_test)














Пример #3
0
# Add an input layer
model.add(Dense(16, activation='relu', input_dim=10))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(8, activation='relu'))

# Add an output layer
model.add(Dense(
    9,
    activation='softmax'))  # output 9 correspond to number of predicted class

# compile model and run the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(xx,
          yy,
          epochs=100,
          validation_data=(val_x, pd.get_dummies(val_y).values))
# Also for evaluate, val_y have to change to one-hot encoding dummy variable
model.evaluate(val_x, pd.get_dummies(val_y).values)

model.summary()
# Display Model Summary and Show Parameters
model.summary()

# Start Training Our Classifier 
batch_size = 10
epochs = 50

history = model.fit(X_train,
                    y_train,
                    batch_size = batch_size,
                    epochs = epochs,
                    verbose = 1,
                    )

predictions1 = model.predict(X_test)
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Plotting our loss charts
import matplotlib.pyplot as plt

history_dict = history.history

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

line1 = plt.plot(epochs, val_loss_values, label='Validation/Test Loss')
line2 = plt.plot(epochs, loss_values, label='Training Loss')
plt.setp(line1, linewidth=2.0, marker = '+', markersize=10.0)
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))  
    model.add(Dense(32, activation='relu'))
    model.add(Dense(5, activation='softmax'))
    
    #stochastic gradient descent
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

model = createModel()
history=model.fit(train_features,train_labels, epochs=20, batch_size=10,validation_data=(test_features,test_labels),callbacks=[plot_losses])

scores = model.evaluate(test_features, test_labels)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

#Artificial Neural Network

from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(mega_data, labels_one_hot, test_size = 0.3, random_state =9, stratify=labels_one_hot)

from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()
scaler.fit(train_features)  
train_features = scaler.transform(train_features)  
test_features = scaler.transform(test_features) 

print(train_features.shape)
print(test_features.shape)
epochs = 10
checpoint = ModelCheckpoint('models/WWII_names.h5f',
                            monitor='val_loss',
                            save_best_only=True,
                            verbose=1)
callbacks = [checpoint]

H = model.fit(trainX,
              trainY,
              epochs=epochs,
              validation_data=(testX, testY),
              callbacks=callbacks,
              verbose=1)

model = load_model('models/WWII_names.h5f')
score, acc = model.evaluate(testX, testY)
print(score, acc)

prediction = model.predict(testX)
prediction = prediction.argmax(axis=1)
print(classification_report(testY.argmax(axis=1), prediction))

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, epochs), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, epochs), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
Пример #7
0
                               class_weight='balanced',
                               n_estimators=50)
model.fit(x_train, y_train)

# add predictions to dataset
df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist())

# train LSTM model
max_features = len(word_to_index)
maxlen = len(features[0])
batch_size = 32
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
x_train, x_test, y_train, y_test = np.array(x_train), np.array(
    x_test), np.array(y_train), np.array(y_test)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=1,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
y_proba=model.predict_proba(x_test)

f1_scor=f1_score_(y_proba,y_test)


# LSTM model

embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X_train.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())
 

batch_size = 32
model.fit(X_train, Y_train, epochs = 7, batch_size=batch_size, verbose = 2)

score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size)
print("score: %.2f" % (score))
print("acc: %.2f" % (acc))




from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

K.clear_session()  # clear model from memory

model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# train model
history = model.fit(X_train, y_train,
                    epochs=10)  # record history of training progress
result = model.evaluate(X_test, y_test)

# visualize the training process
historydf = pd.DataFrame(history.history, index=history.epoch)
historydf.plot(ylim=(0, 1))
plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15)

# ===================================
#   manually tune learning rate
# ===================================
dflist = []

learning_rates = [0.01, 0.05, 0.1, 0.5]

for lr in learning_rates:
Пример #10
0
model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train,
                    y_train,
                    epochs=30,
                    verbose=2,
                    validation_split=0.1)

#Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input
#model.evaluate gives the loss and accuracy for 0 and 1 index respectively
result = model.evaluate(X_test, y_test)

history = pd.DataFrame(history.history, index=history.epoch)
history.plot(ylim=(0, 1))
plt.title('the accuracy for the test set is {:.3f}'.format(result[1] * 100),
          fontsize=15)

dflist = []
learning_rates = [0.01, 0.05, 0.1, 0.5]

for lr in learning_rates:
    K.clear_session()

    model = Sequential()
    model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
    model.compile(Adam(lr=lr),
Пример #11
0
    def train(alg_str, tempx_train, tempy_train, tempx_val, tempy_val, x_test,
              y_test):
        #print("Algorithm:", alg_str)
        #print("tempx_train shape:", tempx_train.shape)
        #print("tempy_train shape:", tempy_train.shape)

        if (alg_str != "cnn" and alg_str != "dnn"):
            if (alg_str == "knn"):
                kVals = range(1, 5)
                accuracies_knn = []

                # loop over various values of `k` for the k-Nearest Neighbor classifier
                for k in range(1, 5):
                    # train the k-Nearest Neighbor classifier with the current value of `k`
                    knnmodel = KNeighborsClassifier(n_neighbors=k)
                    knnmodel.fit(tempx_train, tempy_train)

                    score = knnmodel.score(tempx_val, tempy_val)
                    print("k=%d, accuracy=%.2f%%" % (k, score * 100))
                    accuracies_knn.append(score)

                    # find the value of k that has the largest accuracy
                    j = int(np.argmax(accuracies_knn))
                    print(
                        "k=%d achieved highest accuracy of %.2f%% on validation data"
                        % (kVals[j], accuracies_knn[j] * 100))

                knnmodel = KNeighborsClassifier(n_neighbors=kVals[j])
                model = knnmodel

            elif (alg_str == "rf"):
                kVals = [100]
                accuracies_rf = []

                # loop over various values of `k` for the Random Forest classifier
                for k in [100]:
                    # train the Random Forest classifier with the current value of `k`
                    model = RandomForestClassifier(n_estimators=k)
                    model.fit(tempx_train, tempy_train)
                    score = model.score(
                        tempx_val, tempy_val
                    )  #is this right?  Using val to find best # of trees?
                    print("k=%d, accuracy=%.2f%%" % (k, score * 100))
                    accuracies_rf.append(score)

                    # find the value of k that has the largest accuracy
                    j = int(np.argmax(accuracies_rf))
                    print(
                        "k=%d achieved highest accuracy of %.2f%% on validation data"
                        % (kVals[j], accuracies_rf[j] * 100))
                rfmodel = RandomForestClassifier(n_estimators=kVals[j])
                model = rfmodel
            else:
                #svm or lin case
                model = skmodel_dict[alg_str]

            model.fit(tempx_train, tempy_train)
            predictions = model.predict(x_test)
            acc = accuracy_score(y_test, predictions)
            print(alg_str + " accuracy: ", acc)

            #recording the variance
            variance.append([i,
                             acc])  #format: [[5,0.123],[5,0.135],[5,n], etc.]

            return round(acc, 4)

        else:  #cnn or dnn case
            if (alg_str == "cnn"):
                #callbacks
                filepath = 'cnnbestweights.hdf5'
                callbacks = [
                    EarlyStopping(monitor='val_loss', patience=20, verbose=1),
                    ModelCheckpoint(filepath,
                                    monitor='val_loss',
                                    save_best_only=True,
                                    verbose=1),
                ]
                model = cnnmodel()
            if (alg_str == "dnn"):
                #callbacks
                filepath = 'dnnbestweights.hdf5'
                callbacks = [
                    EarlyStopping(monitor='val_loss', patience=20, verbose=1),
                    ModelCheckpoint(filepath,
                                    monitor='val_loss',
                                    save_best_only=True,
                                    verbose=1),
                ]
                model = dnnmodel()
            #fitting the model
            model.fit(tempx_train,
                      tempy_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose=1,
                      callbacks=callbacks,
                      validation_data=(tempx_val, tempy_val))

            #testing the model on the testing set
            testscore = model.evaluate(x_test, y_test, verbose=0)
            print("testscore: ", testscore[1])

            #recording the variance
            variance.append([i, testscore[1]
                             ])  #format: [[5,0.123],[5,0.135],[5,n], etc.]

            return round(testscore[1], 4)