'''model=LogisticRegression()
model.fit(x_train,y_train)
prediction=model.predict(x_test)
print(accuracy_score(prediction,y_test))
print(confusion_matrix(y_test,prediction))
print(classification_report(y_test,prediction))'''
#Random-forest
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(x_train, y_train)
prediction = model.predict(x_test)
print(accuracy_score(prediction, y_test))
print(confusion_matrix(y_test, prediction))
print(classification_report(y_test, prediction))
#DL
'''import keras
from keras.models import Sequential
from keras.layers import Dense
model=Sequential()
model.add(Dense(9,activation='relu',input_dim=18))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"])
model.fit(x_train,y_train,batch_size=10,epochs=16,validation_data=(x_test,y_test))
score=model.evaluate(x_test,y_test,verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])'''
# Random forest gave the benchmark for the prediction accuracy
#Checking the features which high imporatnce in predicting outcome
feature_imp = pd.DataFrame(model.feature_importances_,
                           index=pd.DataFrame(x_train).columns,
                           columns=['importance']).sort_values('importance',
Пример #2
0
# Try FC-NN model
from keras.models import Sequential
from keras.layers import Dense

# xx is the train data as DataFrame with (390144, 10), row is number of samples and column is number of features
# In here number of features also correspond to input_dim for first NN layer
xx = train_x

# Output is muti-class thus train_y have to convert to one-hot encoding
yy = pd.get_dummies(train_y).values

model = Sequential()

# Add an input layer
model.add(Dense(16, activation='relu', input_dim=10))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(8, activation='relu'))

# Add an output layer
model.add(Dense(
    9,
    activation='softmax'))  # output 9 correspond to number of predicted class
# Commented out IPython magic to ensure Python compatibility.
# Neural nets example

# %tensorflow_version 2.x 

# If you wish to use Tensorflow 1.X run the following line and then restart runtime
# %tensorflow_version 1.x 
# You'll need to change your import statements from tensorflow.keras to keras
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()

model.add(Dense(18, kernel_initializer = "uniform", activation = "relu", input_dim=16))
model.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))

model.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])

# Display Model Summary and Show Parameters
model.summary()

# Start Training Our Classifier 
batch_size = 10
epochs = 50

history = model.fit(X_train,
                    y_train,
                    batch_size = batch_size,
                    epochs = epochs,
print(padded_test)
#%%
#RandomForest model fitting
model = RandomForestClassifier(n_estimators=100)
model.fit(padded_train, y_train)
#%%
y_pred = model.predict(padded_test)
acc = accuracy_score(y_pred, y_test)
print(acc * 100, "%")

# %%
#%%
vocab_size = 50_000
one_hots = [one_hot(word, vocab_size) for word in X_train]
print(one_hots)
# %%
padded = pad_sequences(one_hots, padding='post', maxlen=5)
print(padded)
# %%
model = Sequential()
model.add(Embedding(vocab_size, 50))
model.compile("adam", "mse")
# %%
predict = model.predict(padded)
# %%
predict.shape

# %%

# %%
Пример #5
0
def algorithm(method_A, OneVsRest, OneVsOne, randomized):

    print("Selecting algorithm...")
    print("      ")

    if method_A == "svm":

        print("Starting with " + method_A)
        print("      ")

        parameters_svm = {
            'kernel': ('linear', 'rbf'),
            'C': [1, 3, 10, 100],
            'gamma': [0.01, 0.001]
        }
        model = svm.SVC()
        model = search_par(randomized, model, parameters_svm)

    if method_A == "random_forest":

        print("Starting with " + method_A)
        print("      ")

        parameters_random = {
            "max_depth": [2, 3, None],
            "max_features": [2, 4, 6],
            "min_samples_split": [2, 4, 6],
            "min_samples_leaf": [2, 4, 6],
            "bootstrap": [True, False],
            "criterion": ["gini", "entropy"]
        }
        model = RandomForestClassifier(n_estimators=100)
        model = search_par(randomized, model, parameters_random)

    if method_A == "logistic":

        print("Starting with " + method_A)
        print("      ")

        parameters_logistic = {'C': [100, 1000], 'tol': [0.001, 0.0001]}
        model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
        model = search_par(randomized, model, parameters_logistic)

    if method_A == "neural_networks":

        print("Starting with " + method_A)
        print("      ")

        #model = MLPClassifier()

        model = Sequential()
        model.add(
            Dense(991, input_dim=179, init='normal')
        )  # number of features of the data +1 node for the bias term.
        model.add(Activation('relu'))
        model.add(Dropout(0.2))
        model.add(
            Dense(495, init='normal')
        )  #In sum, for most problems, one could probably get decent performance (even without a second optimization step) by setting the hidden layer configuration using just two rules: (i) number of hidden layers equals one; and (ii) the number of neurons in that layer is the mean of the neurons in the input and output layers.
        model.add(Activation('relu'))
        model.add(Dropout(0.5))
        model.add(
            Dense(99, init='normal')
        )  # If the NN is a classifier, then it also has a single node unless softmax is used in which case the output layer has one node per class label in your model.
        model.add(Activation('softmax'))

        sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])

        OneVsRest = False
        OneVsOne = False

    if OneVsRest:

        print("Using OneVsRest ")
        print("      ")

        return OneVsRestClassifier(model)

    if OneVsOne:

        print("Using OneVsOne")
        print("      ")

        return OneVsOneClassifier(model)

    print("Algorithm selected: " + method_A)
    print("      ")

    return model
Пример #6
0
#Applying k-fold cross validation
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y= y_train, cv = 10) #cv parameter is the number of folds to split the data
m= accuracies.mean()
print(m)
s= accuracies.std()
print(s)

#calculating train score and test score
train_scores = [classifier.score(X_train, y_train)]
test_scores = [classifier.score(X_test, y_test)]

#ANN

'''#Importing the Keras Libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.constraints import maxnorm

classifier = Sequential() 

#Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim = 11, init = 'uniform',activation='relu',bias_regularizer='l2', input_dim = 12))
Пример #7
0
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cmSVM = confusion_matrix(y_test, y_pred)

#-------------------------------------ANN-----------------------------

import keras
from keras.models import Sequential
from keras.layers import Dense

# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(
    Dense(output_dim=12, init='uniform', activation='relu', input_dim=6))

# Adding the second hidden layer
#classifier.add(Dense(output_dim = 150, init = 'uniform', activation = 'sigmoid'))

# Adding the third hidden layer
classifier.add(Dense(output_dim=80, init='uniform', activation='relu'))

# Adding the fourth hidden layer
classifier.add(Dense(output_dim=12, init='uniform', activation='sigmoid'))

# Adding the output layer
classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))

# Compiling the ANN
classifier.compile(optimizer='adam',
Пример #8
0
                                                                1)).toarray()
y_test = onehot_direct.fit_transform(np.array(y_test).reshape(600,
                                                              1)).toarray()

## Deep Learning
from keras.models import Sequential, Model
from keras.layers import Dense, Input
'''deep_inp = Input(shape=x_train.shape,name='input')
deep = Dense(100, activation='tanh')(deep_inp)
deep = Dense(100, activation='tanh')(deep)
deep_out = Dense(4, activation='softmax')(deep)

model = Model(inputs=deep_inp, outputs=deep_out)'''

model = Sequential()
model.add(Dense(units=100, activation='tanh', kernel_initializer='he_uniform'))
model.add(Dense(units=100, activation='tanh', kernel_initializer='he_uniform'))
model.add(Dense(units=4, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

from sklearn.preprocessing import OneHotEncoder,
onehot_direct = OneHotEncoder()
'''y_train_oh = np.array(y_train)
y_train_oh=y_train_oh.reshape(len(y_train_oh),1)
y_train_oh.shape
y_train_oh = onehot_direct.fit_transform(y_train_oh).values
y_train_oh
Пример #9
0
model = RandomForestClassifier(n_estimators=100,
                               verbose=1,
                               class_weight={
                                   0: 1.,
                                   1: weight_1
                               })
# model = GaussianNB(,verbose=1)
# model = linear_model.LogisticRegression(verbose=1)
# model = svm.SVC(kernel='sigmoid', gamma=5,C=1,verbose=1)

DNN = False
if DNN:
    model = Sequential()
    model.add(
        Dense(2000,
              input_dim=tr.shape[1] - 1,
              kernel_initializer='normal',
              activation='relu'))
    # model.add(Dropout(0.5))
    # model.add(Dense(1000, input_dim=1000, kernel_initializer='normal', activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

X = tr.ix[:, tr.columns != 'order']
X = lsa.fit_transform(X)
y = tr['order']
t1 = time.time()
step 3) Baseline Model 2: Logistic Regression
"""
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)

import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

K.clear_session()  # clear model from memory

model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# train model
history = model.fit(X_train, y_train,
                    epochs=10)  # record history of training progress
result = model.evaluate(X_test, y_test)

# visualize the training process
historydf = pd.DataFrame(history.history, index=history.epoch)
historydf.plot(ylim=(0, 1))
plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15)

# ===================================
Пример #11
0
        _val_f1 = f1_score(val_targ, val_predict)
        _val_recall = recall_score(val_targ, val_predict)
        _val_precision = precision_score(val_targ, val_predict)
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        print (" — val_f1: % f — val_precision: % f — val_recall % f " ,_val_f1, _val_precision, _val_recall)
        return

metrics = Metrics()
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(200,), max_iter=500, alpha=0.0001,
                     solver='', verbose=10,  random_state=0,tol=0.00000001,batch_size=100)


"""

model.add(Dense(8, input_dim=8, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

"""

#compile the model

"""

model.compile(loss='binary_crossentropy', optimizer='adagrad',metrics=['accuracy'])
model.summary()
Пример #12
0
# In[17]:

# Download the dataset
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# In[18]:

from keras.models import Sequential
from keras.layers import Dense, Activation

# In[19]:

# Build the architecture
model = Sequential()
model.add(Dense(20, input_dim=784))
model.add(Activation('relu'))
model.add(Dense(20))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
model.summary()

# In[20]:

# Set the optimizer and the loss
from keras.optimizers import SGD

opt = SGD(lr=0.001)
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
Пример #13
0
model = model.fit(x_train, y_train)
y_predict = model.predict(x_test)

print("\naccuracy", np.sum(y_prediction == y_test) / float(len(y_test)))


# 1-layer Neural Network
###############################
from keras.models import Sequential
from keras.layers import Dense, Activation


start = time()

model = Sequential()
model.add(Dense(input_dim=4, output_dim=2))
model.add(Activation("softmax"))


model.compile(loss='categorical_crossentropy',
              optimizer='sgd', metrics=['accuracy'])
model.fit(x_train, y_train_onehot)

print('\ntime taken %s seconds' % str(time() - start))

y_prediction = model.predict_class(x_test)
print('\n\naccuracy', np.sum(y_prediction == y_test) / float(len(y_test)))

##########################################

Пример #14
0
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from keras.layers import Dense

model = RandomForestClassifier()
cross_val_score(model, X, y_true)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_true,
                                                    test_size=0.3,
                                                    random_state=42)

import keras.backend as K

model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train,
                    y_train,
                    epochs=30,
                    verbose=2,
                    validation_split=0.1)

#Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input
#model.evaluate gives the loss and accuracy for 0 and 1 index respectively
result = model.evaluate(X_test, y_test)

history = pd.DataFrame(history.history, index=history.epoch)
Пример #15
0
            final_X = final_data[:, 0:-1]
            final_Y = final_data[:, -1]

            # Based on user choise, choose the classifier to be trained
            if args.classifier == "random_forest":
                final_model = RandomForestClassifier(n_estimators=100,
                                                     max_depth=32,
                                                     random_state=0,
                                                     n_jobs=-1,
                                                     verbose=True)
            elif args.classifier == "logistic_regression_keras":
                classes = 26
                final_model = Sequential()
                final_model.add(
                    Dense(classes,
                          activation='softmax',
                          kernel_regularizer=regularizers.l1(0.0000001),
                          input_shape=(293, )))
                final_model.compile(optimizer=optimizers.adam(lr=0.01),
                                    loss='categorical_crossentropy',
                                    metrics=['accuracy'])
                final_model.fit(final_X,
                                to_categorical(final_Y),
                                epochs=100,
                                batch_size=32)
            elif args.classifier == "logistic_regression_scikit":
                final_model = LogisticRegression(penalty='l1',
                                                 C=1000,
                                                 multi_class="multinomial",
                                                 solver="saga",
                                                 max_iter=100,
Пример #16
0
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten

start = time()

img_rows, img_cols = 28, 28
nb_filters = 32
pool_size = (2,2)
kernel_size = (3,3)

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

model = Sequential()
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
                        border_mode='valid',
                        input_shape=input_shape))

model.add(Activation('relu'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
print('Accuracy Score',acc)

accuracy.append(acc)

y_proba=model.predict_proba(x_test)

f1_scor=f1_score_(y_proba,y_test)


# LSTM model

embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X_train.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())
 

batch_size = 32
model.fit(X_train, Y_train, epochs = 7, batch_size=batch_size, verbose = 2)

score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size)
print("score: %.2f" % (score))
print("acc: %.2f" % (acc))

Пример #18
0
cm_values = list()
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from keras.models import Sequential
    from keras.layers import Dense
    # Initialising the ANN
    classifier = Sequential()

    # Adding the input layer and the first hidden layer
    classifier.add(
        Dense(output_dim=20, init='uniform', activation='relu', input_dim=72))

    # Adding the second hidden layer
    classifier.add(Dense(output_dim=5, init='uniform', activation='relu'))

    # Adding the output layer
    classifier.add(
        Dense(output_dim=1, init='uniform', activation='hard_sigmoid'))

    # Compiling the ANN
    classifier.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

    # Fitting the ANN to the Training set
    classifier.fit(X_train, y_train, batch_size=10, nb_epoch=10)
Пример #19
0
    print('Test AUC %.2f' % roc_auc_score(y_test, model.predict(x_test)))
    print('Test accuracy %.2f' % model.score(x_test, y_test))
model = RandomForestClassifier(max_depth=6,
                               class_weight='balanced',
                               n_estimators=50)
model.fit(x_train, y_train)

# add predictions to dataset
df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist())

# train LSTM model
max_features = len(word_to_index)
maxlen = len(features[0])
batch_size = 32
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
x_train, x_test, y_train, y_test = np.array(x_train), np.array(
    x_test), np.array(y_train), np.array(y_test)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=1,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
Пример #20
0
acc = model.score(x_test, y_test)

y_pred = model.predict(x_test)
acc2 = accuracy_score(y_test, y_pred)



from keras.models import Sequential, Model
from keras.layers import Dense, LSTM, Input
from keras.utils import np_utils

y_train = np_utils.to_categorical(y_train, 11)
y_test = np_utils.to_categorical(y_test, 11)

model = Sequential()
model.add(Dense(100, input_dim=11, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(11, activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

from keras.callbacks import EarlyStopping
callback1 = EarlyStopping(monitor='loss', patience=20, mode='auto')
model.fit(x_train, y_train, epochs=1000, batch_size=10)


print(acc)

Пример #21
0
def main ():
    print("......... Welcome to SBA Loan Data Analysis ....... ")
    print()
    ip1 = int(input("What you want to do : \n 1) Prediction \n or \n 2) Analyze the data..?? \n\n "))
    
    if ip1 == 1:
        print("Menu :\n \
            1)Random forest \n \
            2)Decision Tree \n \
            3)Naives Bayes \n \
            4)SVM \n \
            5)XG Boost \n \
            6)KNN \n \
            7)Keras Neural Network ")  
        ip2 = int(input("Enter a value from Above Menu : "))
        
        # Importing the libraries
        import numpy as np
        import pandas as pd
        
        # Importing the dataset
        D7aFY1991_FY1999 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY1991_FY1999_1.csv')
        D7aFY2000_FY2009 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2000_FY2009_1.csv')
        D7aFY2010_Present = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2010_Present_1.csv')
                
        #merge same dataframes
        
        Data_7a = D7aFY1991_FY1999.append(D7aFY2000_FY2009)
        Data_7a = Data_7a.append(D7aFY2010_Present)
        
        #create sample data
        #Data_sample_7a = Data_7a.sample(frac = 0.1,random_state = 0)
        Data_sample_7a = Data_7a
        
        Data_sample_7a = Data_sample_7a.iloc[:,[4,6,9,11,12,14,16,17,18,19,20,24,25,26,28,29]].values
        
        #convert into pandas dataframe
        
        Data_sample_7a = pd.DataFrame(data=Data_sample_7a)
        
        # Taking care of missing data
        
        from sklearn.preprocessing import Imputer
        imputer = Imputer(missing_values = 'NaN', strategy = 'most_frequent', axis = 0)
        imputer = imputer.fit(Data_sample_7a.iloc[:,10:11]) 
        Data_sample_7a.iloc[:,10:11] = imputer.transform(Data_sample_7a.iloc[:,10:11])
        
        from sklearn.preprocessing import Imputer
        imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
        imputer = imputer.fit(Data_sample_7a.iloc[:,8:9])
        Data_sample_7a.iloc[:,8:9] = imputer.transform(Data_sample_7a.iloc[:,8:9])
        
        Data_sample_7a = Data_sample_7a.dropna()
              
        #slpit data columns into dependent and independent variables
        
        X7a = Data_sample_7a.iloc[:,0:14].values #independent
        y7a = Data_sample_7a.iloc[:,[15]].values #dependent
        
        # =============================================================================
        #convert numpy objects to pandas dataframes
        pd_X7a = pd.DataFrame(data=X7a[0:,0:])
        pd_y7a = pd.DataFrame(data=y7a[0:,0:])
        # =============================================================================
        
        #encoding categorical data in independent variable
        pd_X7a=np.asarray(pd_X7a)#convert pandas dataframe into numpy array
        pd_X7a = encoder(pd_X7a)
        
        #getuser data and encode
        Userdata = getdata(pd_X7a)
        Userdata=np.asarray(Userdata)#convert pandas dataframe into numpy array
        Userdata = encoder(Userdata)
                
        #encoding dependent variable
        pd_y7a[0] = pd_y7a[0].replace(['PIF'],'0')
        pd_y7a[0] = pd_y7a[0].replace(['CANCLD','EXEMPT','CHGOFF','COMMIT'],'1')
        
        pd_y7a=np.asarray(pd_y7a)#convert pandas dataframe into numpy array
        
        # Splitting the dataset into the Training set and Test set
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(pd_X7a, pd_y7a, test_size = 0.25, random_state = 0)
                
        # Feature Scaling of accuracy data
        (X_train1, X_test) = scalingFunction(X_train,X_test)
        
        # Feature Scaling of user data
        from sklearn.preprocessing import StandardScaler
        sc = StandardScaler()
        Userdata = sc.fit_transform(Userdata)
        
                
        if ip2 == 1:
                
                # Fitting Random Forest Classification to the Training set
                from sklearn.ensemble import RandomForestClassifier
                classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
                classifier.fit(X_train1, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                y_pred = pd.DataFrame(data=y_pred[0:]) #converting to data frame 
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
                
                
        elif ip2 == 2:
            
                # Fitting Decision Tree Classification to the Training set
                from sklearn.tree import DecisionTreeClassifier
                classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 3:
            
                # Fitting Naive Bayes to the Training set
                from sklearn.naive_bayes import GaussianNB
                classifier = GaussianNB()
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 4:
            # Fitting SVM to the Training set
                from sklearn.svm import SVC
                classifier = SVC(kernel = 'linear', random_state = 0)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 5:
            # Fitting XGBoost to the Training set
                from xgboost import XGBClassifier
                classifier = XGBClassifier()
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 6:
            # Fitting K-NN to the Training set
                from sklearn.neighbors import KNeighborsClassifier
                classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 7:
            # Importing the Keras libraries and packages
                import keras
                from keras.models import Sequential
                from keras.layers import Dense
                
                # Initialising the ANN
                classifier = Sequential()
                
                # Adding the input layer and the first hidden layer1
                classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu', input_dim = 14))
                
                # Adding the second hidden layer
                classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu'))
                
                # Adding the output layer
                classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
                
                # Compiling the ANN
                classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
                
                # Fitting the ANN to the Training set
                classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)
                
                # Part 3 - Making the predictions and evaluating the model
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                y_pred = (y_pred > 0.5)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the Test set results
                user_pred = classifier.predict(Userdata)
                user_pred = (user_pred > 0.5)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
                
                      
            
    elif ip1 == 2:      
        print("Menu : \n \
        1)Business wise JobsSupported \n \
        2)Compare:Gross Aproval Vs SBA Aproval \n \
        3)DistOffice wise SBAapproval \n \
        4)GrossApproval Per LoanStatus \n \
        5)GrossApproval Per DeliveryMethod \n \
        6)JobsSupported per LoanStatus \n \
        7)SBAapproval Loan Status \n \
             ")
        ip2 = int(input("Enter a value from Above Menu : "))
        from PIL import Image 
        if ip2 == 1:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\BusinnJobsSupp.png')
            img.format = "PNG"
            img.show()
                
        elif ip2 == 2:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\Comp_GrossSBA.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 3:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\DistOff_wise_SBAappr.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 4:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppLoanSt.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 5:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppr_DeliveryMethod.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 6:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\JobsSuppLoanSt.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 7:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\SBAapprLoanSt.png')
            img.format = "PNG"
            img.show()
Пример #22
0
def tenfoldcrossvalidation(feature_map, id_truth_map, index, id_tweet_map):
	feature_map = dict(sorted(feature_map.items(), key=operator.itemgetter(1)))

	tweets = []
	truth = []
	keys = []

	for key, feature in feature_map.iteritems():
		tweets.append(feature)
		truth.append(index[id_truth_map[key]])
		keys.append(key)

	accuracy = 0.0
	tp = 0.0
	tn = 0.0
	fp = 0.0
	fn = 0.0
	for i in xrange(10):
		tenth = len(tweets)/10
		start = i*tenth
		end = (i+1)*tenth
		test_index = xrange(start,end)
		train_index = [i for i in range(len(tweets)) if i not in test_index]
		train_tweets = []
		train_keys = []
		test_tweets = []
		test_keys = []
		train_truth = []
		test_truth = []
		
		for i in xrange(len(tweets)):
			if i in train_index:
				train_tweets.append(tweets[i])
				train_truth.append(truth[i])
				train_keys.append(keys[i])
			else:
				test_tweets.append(tweets[i])
				test_truth.append(truth[i])
				test_keys.append(keys[i])

		new_train_tweets = featureselection(train_tweets, train_tweets, train_truth)
		new_test_tweets = featureselection(test_tweets, train_tweets, train_truth)

		if sys.argv[1] == "rbfsvm":
			print "RBF kernel SVM"
			clf = svm.SVC(kernel='rbf', C=1000, gamma=0.0001)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
		elif sys.argv[1] == "randomforest":
		# # Using Random forest for classification.
			print 'Random forest'
			clf = RandomForestClassifier(n_estimators=10, max_depth=None)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
			# getaccuracy(test_predicted, test_truth)
		elif sys.argv[1] == "linearsvm":
		# # Using Linear svm for classification.
			print 'Linear SVM'
			clf = svm.LinearSVC(random_state=20)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
			# print "F.score:"
			# print(f1_score(test_predicted, test_truth, average="micro"))
			# print "Accuracy:"
			# print(accuracy_score(test_predicted, test_truth, normalize="False"))
			# getaccuracy(test_predicted, test_truth)
		# elif sys.argv[1] == "polysvm":
		
		# 	print 'Poly SVM'
		# 	clf = svm.SVC(kernel='poly')
		# 	clf.fit(np.array(new_train_tweets), np.array(train_truth))
		# 	test_predicted = clf.predict(np.array(new_test_tweets))

		elif sys.argv[1] == "nn":
		
			print 'Neural Network'
			clf = Sequential()
			clf.add(Dense(7460, activation='relu'))
			clf.add(Dense(5000, activation='relu'))
			clf.add(Dense(2000, activation='relu'))
			clf.add(Dense(500, activation='relu'))
			clf.add(Dense(1, activation='softmax'))
			clf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
			clf.fit(np.array(new_train_tweets), np.array(train_truth), batch_size=64, epochs=10, validation_split=0.1)
			test_predicted = clf.predict(np.array(new_test_tweets))
			print(f1_score(test_predicted, test_truth, average="micro"))
		elif sys.argv[1]=="xgb":
			xgb_model = xgb.XGBClassifier(objective="binary:logistic")
			xgb_model.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = xgb_model.predict(np.array(new_test_tweets))

		accuracy += getaccuracy(test_predicted, test_truth)
		tp += gettp(test_predicted, test_truth)
		tn += gettn(test_predicted, test_truth)
		fp += getfp(test_predicted, test_truth)
		fn += getfn(test_predicted, test_truth)
		if(sys.argv[1]=="nn"):
			print accuracy
			# print tp, tn, fp, fn
			precision = tp/(tp+fp)
			recall = tp/(tp+fn)
			print "F-score:"
			print (2*precision*recall)/(precision + recall)
			break
	print accuracy/10.0
	# print tp, tn, fp, fn
	precision = tp/(tp+fp)
	recall = tp/(tp+fn)
	print "F-score:"
	print (2*precision*recall)/(precision + recall)
Пример #23
0
def main():
    
    width = 128
    height = 128
    depth = 3
    classes = 2
    NUM_EPOCHS = 50
    
    
    #initialize the optimizer and model
    opt = tf.keras.optimizers.SGD(lr=0.01)
    
    project_dir = "deepfake-detection-challenge"
    #train_metadata, train_videos, labels, originals = load_json(project_dir)
    train_sub_dir = "/train_sample_videos/"
    dest_train_1 = '/train_1/'
    #break_to_frames_train(project_dir, train_videos, labels, width, height)
    
    #test_video_names, test_videos = load_test_videos(project_dir)
    test_sub_dir = "/test_videos/"
    dest_test_1 = '/test_1/'
    #break_to_frames_test(project_dir, test_videos, width, height)
  
    
    train_new_csv = make_dataframe_train(project_dir)
    test_new_csv = make_dataframe_test(project_dir)
    train_new_csv = '/train_new.csv'
    
    X_train, y_train, X_test, y_test, train, y_train_original, y_test_original  = get_Xy(project_dir,train_new_csv, width, height, depth)
    
    #Normalization
    X_train = X_train.astype("float")/ 255.0
    X_test = X_test.astype ("float")/ 255.0
    
    #One hot encode y
    
     
    choice = 4
    
    if choice == 1:  #not working 
        base_model = vgg16Model(X_train, X_test, width, height, depth, classes)
        
        # checkpointing to save the weights of best model
        mcp_save = tf.keras.callbacks.ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')
        # compiling the model
        base_model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
        # training the model
        H = base_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128)
        print ("Base Model - Test Data Loss and Accuracy: ", model.evaluate(X_test, y_test))
        
        print("Final Plot ")
        plotAccLoss(H, NUM_EPOCHS)
        
    if choice == 2: 
        # Feature Extraction and Usage of Secondary Model
        vggModel = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(width, height, depth))
        print(vggModel.summary())
       
        X_train_new = vggModel.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = vggModel.predict(X_test)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if (secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
            results = model.predict(X_val_new)
            print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test))
    
        if(secondary_model == 'naive_bayes'):
            print("Secondary Model - Using Naive Bayes")
            nBayes = GaussianNB()
            nBayes = nBayes.fit( X_train_new , y_train)
            accuracy = nBayes.score(X_val_new, y_test)
            print ("Naive Bayes Accuracy ", accuracy)
      
    if choice == 3: 
        # not working
        # FineTuning 
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
      
        model =tf.keras.models.Sequential()
        model.add (inceptionV3Model)
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dropout (0.5))
      
        model.add(tf.keras.layers.Dense (256, 'relu'))
        model.add(tf.keras.layers.Dense (classes, activation='sigmoid'))
        print (model.summary)
        NUM_EPOCHS =50
        opt = tf.keras.optimizers.SGD(lr=0.001)
        model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
      
        H = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))
      
        plotAccLoss(H, NUM_EPOCHS)
      
        print ("\n Phase B  - Fine Tune Fully Connected Layer and Selected Convolutional Layers \n")
        inceptionV3Model.trainable = True
        trainableFlag = False
        for layer in inceptionV3Model.layers:
          if layer.name == 'block4_conv1':
            trainableFlag = True
          layer.trainable = trainableFlag
        opt = tf.keras.optimizers.SGD(lr=0.00001)
        model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
        print (model.summary)
      
        H = model.fit(trainX, trainY, epochs=NUM_EPOCHS, batch_size=32, validation_data=(testX, testY))
        print("Final Plot ")
        plotAccLoss(H, NUM_EPOCHS)


    if choice == 4:
        # works
        # Feature Extraction and Usage of Secondary Model
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
        
        print(inceptionV3Model.summary())
       
        X_train_new = inceptionV3Model.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = inceptionV3Model.predict(X_test)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if(secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
            accuracy = evaluate(model, X_val_new, y_test)
            # results = model.predict(X_val_new)
            # print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test))
            print("Random Forest Accuracy ", accuracy)
    
        if(secondary_model == 'naive_bayes'):
            print("Secondary Model - Using Naive Bayes")
            nBayes = GaussianNB()
            nBayes = nBayes.fit( X_train_new , y_train)
            accuracy = nBayes.score(X_val_new, y_test)
            print ("Naive Bayes Accuracy ", accuracy)

    if choice == 41:
        # works
        # Feature Extraction and Usage of Secondary Model
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
        
        print(inceptionV3Model.summary())
       
        X_train_new = inceptionV3Model.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = inceptionV3Model.predict(X_test)
        print("X_val_new b4 reshaping ", X_val_new)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if(secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
           
            predY = model.predict(X_val_new)
            #accuracy on the images
            print ("Images - Random Forest Accuracy ", metrics.accuracy_score(predY, y_test))
        
            
        #name of the video , label in X_val_new
        #collect all the images/group by all iamges with same irst name and count probability , if out of 11 frames atleast 3 are fake, then video is fake`
         # storing the images and their class in a dataframe
         
        # print("train.head() ", train.head(), train.shape )
        # print("y_test ", y_test, y_test.shape )
        # print("predY ", predY, predY.shape )
        # print("predY[:,0] ", predY[:,0])   #this a series
        # print("X_val_new ", X_val_new,X_val_new.shape )
        
        
        # pred_data_frame = train.copy(deep=True)
        # video_names = []
        # image_names = train['image']
        
        
        
        # for i in range(len(image_names)):
        #     #get the video name from the frame e.g.  aagfhgtpmv.mp4_frame0.jpg
        #     video_names.append(image_names[i].split("_")[0])
            
        # pred_data_frame['video'] =  video_names
        # print("pred_data_frame.head() ", pred_data_frame.head())
        # pred_data_frame['pred_image_fake'] = predY[:,0]
        # pred_data_frame['pred_image_real'] = predY[:,1]
        
        
        
        # pred_video_label1 = []        
        # # #sort the df based on video names
        # # pred_data_frame = pred_data_frame.sort_values(by=['video'])
        # pred_video_label = pred_data_frame.groupby(['video'])['pred_image_label'].count()
        # print(pred_video_label.head())


        # print ("Video Classification Accuracy ", metrics.accuracy_score(predY, y_test))

        # if(secondary_model == 'naive_bayes'):
        #     print("Secondary Model - Using Naive Bayes")
        #     nBayes = GaussianNB()
        #     nBayes = nBayes.fit( X_train_new , y_train)
        #     accuracy = nBayes.score(X_val_new, y_test)
        #     print ("Naive Bayes Accuracy ", accuracy)

            
    if choice == 5:
        
        #lstm
        model = Sequential()
        model.add(LSTM(256,dropout=0.2,input_shape=(train_data.shape[1],train_data.shape[2])))
        model.add(Dense(1024, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(5, activation='softmax'))
        sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True)
        model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        #model.load_weights('video_1_LSTM_1_512.h5')
        callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ]
        nb_epoch = 500
        model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
        
        return model
    
    if choice ==6:
        #ensemble         
        vggModel= tf.keras.applications.VGG16 (weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
        model1 = tf.keras.models.Sequential()
        model1.add (vggModel)
        model1.add(tf.keras.layers.Flatten())
        model1.add(tf.keras.layers.Dropout (0.5))
        model1.add(tf.keras.layers.Dense (256, 'relu'))
        model1.add(tf.keras.layers.Dense (17, activation='softmax'))

        inceptionv3model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
    
        model2 = tf.keras.models.Sequential()
        model2.add(inceptionv3model)
        model2.add(tf.keras.layers.Flatten())
        model2.add(tf.keras.layers.Dropout (0.5))
        model2.add(tf.keras.layers.Dense (256, 'relu'))
        model2.add(tf.keras.layers.Dense (17, activation='softmax'))

       
        model_name = 'knn'
        if(model_name == 'randomforest'):
            model = RandomForestClassifier(200)
            model.fit(featuresTrain, trainY)
            # evaluate the model
            results = model.predict(featuresVal)
            print (metrics.accuracy_score(results, testY))
    
       
        if(model_name == 'knn'):
            print("using knn")
            knn = KNeighborsClassifier(n_neighbors=3)
            knn.fit(featuresTrain, trainY)
            knn.predict(featuresVal)
            results = knn.predict(featuresVal)
            print (metrics.accuracy_score(results, testY))
    
      
        if(model_name == 'naive_bayes'):
            print("Using Naive Bayes")
            
            nBayes = GaussianNB()
            nBayes = nBayes.fit( featuresTrain , trainY)
            accuracy = nBayes.score(featuresVal, testY)
            print ("Naive Bayes Accuracy ", accuracy)
    
        
        if(model_name == 'svm'):
            print("Using SVM")
           
            svc = SVC(gamma='auto')
            svc = svc.fit(featuresTrain, trainY)
    #         accuracy = svc.score(test_features, test_labels)
            accuracy = evaluate(svc, featuresVal, testY)
            print ("SVM Accuracy ", accuracy)      
        # resnet50model = tf.keras.applications.resnet50(weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
        # model3 = tf.keras.models.Sequential()
        # model3.add(resnet50model)
        # model3.add(tf.keras.layers.Flatten())
        # model3.add(tf.keras.layers.Dropout (0.5))
        # model3.add(tf.keras.layers.Dense (256, 'relu'))
        # model3.add(tf.keras.layers.Dense (17, activation='softmax'))
    
         # Find the probabilities of all 17 classes in each instance of test data - should be 340 *17 
        predicted_vals1 = model1.predict(testX)
        print("predicted_vals1 shape ", predicted_vals1.shape )
        print("predicted_vals1 ", predicted_vals1 )
    
        predicted_vals2 = model2.predict(testX)
        print("predicted_vals2 shape ", predicted_vals2.shape )
        print("predicted_vals2 ", predicted_vals2 )
        
    
        # predicted_vals3 = model3.predict(testX)
        # print("predicted_vals3 shape ", predicted_vals3.shape )
        # print("predicted_vals3 ", predicted_vals3 )
    
        # element wise addition will help, as we want to add probabilities of each class for each image. Then takke average,
        # as I am using 3 models so 1/3 is multipled to the sum
        predY_sum = predicted_vals1+ predicted_vals2
        element_wise_sum_avg = predY_sum * (1/2)
    
        # Now doing np.argmax
    
        predY = np.argmax(element_wise_sum_avg, axis =1) 
    
        print("predY ", predY)
    
        print("Checking shapes of testY and predY ", testY.shape, " ", predY.shape)
    
        accuracy = accuracy_score(testY, predY)
    
        print(accuracy)
    
    if choice == 7:
        resnet101model = tf.keras.applications.ResNet101(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
        print(resnet101model.summary())
   
        featuresTrain = resnet101model.predict(trainX)
        featuresTrain = featuresTrain.reshape(featuresTrain.shape[0], -1)
        featuresVal = resnet101model.predict(testX)
        featuresVal = featuresVal.reshape(featuresVal.shape[0], -1)
Пример #24
0
def main():
    # df = combine_datasets()
    df = pd.read_csv('./data/combined.csv', index_col=0)
    # df.fillna(-1, inplace=True)
    # df = df.drop(df[~df['certificate'].isin(['G', 'PG', 'PG-13', 'R', 'Not Rated'])].index)
    # df = add_award_points(df)

    # Data preprocessing/encoding
    df = df.drop(['movie', 'movie_id', 'synopsis', 'genre'], axis=1)
    df['popularity'] = 1 / np.array(df['popularity']) * 100
    df = pd.get_dummies(df, columns=['certificate'])
    cols = df.columns.tolist()
    cols = cols[df.columns.get_loc('oscar_animated') +
                1:] + cols[:df.columns.get_loc('oscar_animated') + 1]
    df = df[cols]
    df = df.reset_index(drop=True)
    splitIndex = df.index[df['year'] == 2018][0]
    df = df.drop(['year'], axis=1)

    # Splits data into training and testing sets
    oscarStart = df.columns.get_loc('oscar_best_picture')
    x = df.iloc[:, :oscarStart].values
    y = df.iloc[:, oscarStart:].values
    y[(y > 0) & (y < 1)] = 0.5  # winner is 1, nominee is 0.5, nothing is 0
    xTrain, xTest = x[:splitIndex], x[splitIndex:]
    yTrain, yTest = y[:splitIndex], y[splitIndex:]

    # Checks how imbalanced the data is
    unique, counts = np.unique(yTrain, return_counts=True)
    print(dict(zip(unique, counts)))

    # Scales inputs to avoid one variable having more weight than another
    sc = StandardScaler()
    xTrain = sc.fit_transform(xTrain)
    xTest = sc.transform(xTest)

    modelType = 'neuralnetwork'
    predictCategory = True
    if modelType == 'randomforest':
        model = RandomForestClassifier(random_state=21)
        model.fit(xTrain, yTrain)
        yPred = model.predict(xTest)
        p = np.where(yPred == 2)
        v = np.where(yTest == 2)

    elif modelType == 'neuralnetwork':
        if not predictCategory:
            # One hot encoding for softmax activation function
            trainTargets = []
            for i in yTrain:
                if 1 in i:
                    trainTargets.append([1, 0, 0])
                elif 0.5 in i:
                    trainTargets.append([0, 1, 0])
                else:
                    trainTargets.append([0, 0, 1])
            yTrain = np.array(trainTargets)
            testTargets = []
            for i in yTest:
                if 1 in i:
                    testTargets.append([1, 0, 0])
                elif 0.5 in i:
                    testTargets.append([0, 1, 0])
                else:
                    testTargets.append([0, 0, 1])
            yTest = np.array(testTargets)

            model = Sequential()
            model.add(Dense(256, input_dim=xTrain.shape[1]))
            model.add(Activation('relu'))
            model.add(Dropout(0.2))
            model.add(Dense(3))
            model.add(Activation('softmax'))
            model.compile(optimizer=Adam(lr=0.01),
                          loss='categorical_crossentropy',
                          metrics=['mse'])

            classWeights = {
                0: counts.sum() / counts[2],
                1: counts.sum() / counts[1],
                2: counts.sum() / counts[0]
            }
            model.fit(xTrain,
                      yTrain,
                      epochs=512,
                      batch_size=32,
                      class_weight=classWeights)
        else:
            # One hot encoding for softmax activation function
            trainTargets = [[] for i in range(0, 6)]
            for i in yTrain:
                for idx, j in enumerate(i):
                    if j == 1:  # winner
                        trainTargets[idx].append([1, 0, 0])
                    elif j == 0.5:  # nominee
                        trainTargets[idx].append([0, 1, 0])
                    else:  # loser/nothing
                        trainTargets[idx].append([0, 0, 1])
            yTrain = [np.array(i) for i in trainTargets]
            testTargets = [[] for i in range(0, 6)]
            for i in yTest:
                for idx, j in enumerate(i):
                    if j == 1:  # winner
                        testTargets[idx].append([1, 0, 0])
                    elif j == 0.5:  # nominee
                        testTargets[idx].append([0, 1, 0])
                    else:  # loser/nothing
                        testTargets[idx].append([0, 0, 1])
            yTest = [np.array(i) for i in testTargets]

            if os.path.exists('best.h5'):
                model = load_model('best.h5')
            else:
                input = Input(shape=(xTrain.shape[1], ))
                x = Dense(128, activation='relu')(input)
                x = BatchNormalization()(x)
                x = Dropout(0.2)(x)
                output1 = Dense(3, activation='softmax')(x)
                output2 = Dense(3, activation='softmax')(x)
                output3 = Dense(3, activation='softmax')(x)
                output4 = Dense(3, activation='softmax')(x)
                output5 = Dense(3, activation='softmax')(x)
                output6 = Dense(3, activation='softmax')(x)
                model = Model(inputs=input,
                              outputs=[
                                  output1, output2, output3, output4, output5,
                                  output6
                              ])
                model.compile(optimizer=Adam(lr=0.01),
                              loss='categorical_crossentropy')

                classWeights = {
                    0: counts.sum() / counts[2],
                    1: counts.sum() / counts[1],
                    2: counts.sum() / counts[0]
                }
                model.fit(xTrain,
                          yTrain,
                          epochs=512,
                          batch_size=32,
                          class_weight=classWeights)
                # model.save('best.h5')

        # Training accuracy (put training data back in) and testing accuracy
        compute_model_accuracies(predictCategory, '(TRAINING)', model, xTrain,
                                 yTrain, splitIndex)
        compute_model_accuracies(predictCategory, '(TESTING)', model, xTest,
                                 yTest, splitIndex)
testY = to_categorical(testY, num_classes=len(labels) + 1)

model = RandomForestClassifier(criterion='gini',
                               max_depth=138,
                               max_features='auto',
                               n_estimators=1)
model.fit(trainX, trainY)
y_pred = model.predict(testX)
print('accuracy RR %s' % metrics.accuracy_score(y_pred, testY))

# neural network

model = Sequential()
model.add(
    Embedding(vocab_size,
              output_dim=1500,
              input_length=max_len,
              trainable=True))
model.add(Bidirectional(CuDNNLSTM(128, return_sequences=False)))
model.add(Dropout(0.1))
model.add(Dense(units=1024, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(len(labels) + 1))
model.add(Activation("softmax"))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

epochs = 10
checpoint = ModelCheckpoint('models/WWII_names.h5f',
Пример #26
0
        index1.append(i)

import random
index = list(range(len(df)))
train_index = random.sample(index0, int(0.8 * len(index0))) + random.sample(
    index1,
    int(0.8 *
        len(index1)))  ##test_index is the index of test data随机选出2000个样本作为测试样本
test_index = []  ##train_index is the index of train data
for i in index:
    if i not in train_index:
        test_index.append(i)
print(len(train_index))

model = Sequential()
model.add(Dense(output_dim=50, input_dim=len(df[0]), activation='relu'))
model.add(Dense(output_dim=20, input_dim=50, activation='relu'))
model.add(Dense(output_dim=1, input_dim=20, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(df[train_index], label[train_index], nb_epoch=1000, batch_size=20)

pred = model.predict_classes(df[test_index]).reshape(len(test_index))
print(pred)

k = 0
for i in range(len(pred)):
    if pred[i] == label[test_index][i]:
        k = k + 1
print(k / len(test_index))

#model.save_weights('E:\...\my_model_weights.h5')
Пример #27
0
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=0, verbose=3)
mode = model.fit(X_train, df_train['label'].values)

# model
y_prediction = model.predict(X_test)
print("\naccuracy",
      np.sum(y_prediction == df_test['label'].values) / float(len(y_test)))

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

start = time()

model = Sequential()
model.add(Dense(512, input_shape=(784, )))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

mode.fit(X_train, y_train_onehot)

print('\ntime taken %s seconds' % str(time() - start))
Пример #28
0
neural_data = np.loadtxt('stats_noheader.csv', delimiter=',')

# split into input (X) and output (Y) variables
X_neural = neural_data[:, 0:47]
Y_neural = neural_data[:, 47]

# split into 75% for train and 25% for test
X_train, X_test, y_train, y_test = train_test_split(X_neural,
                                                    Y_neural,
                                                    test_size=0.25,
                                                    random_state=7)

# create model
model = Sequential()
model.add(
    Dense(12, input_dim=47, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

# compile model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit the model
history = model.fit(X_train,
                    y_train,
                    validation_data=(X_test, y_test),
                    epochs=100,
                    batch_size=10)
# list all data in history
Пример #29
0
# SVC best estimator
svc = grid_svc.best_estimator_

# DecisionTree Classifier
tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)),
              "min_samples_leaf": list(range(5,7,1))}
grid_tree = GridSearchCV(DecisionTreeClassifier(), tree_params)
grid_tree.fit(X_train, Y_train)

# tree best estimator
tree_clf = grid_tree.best_estimator_


model=Sequential()
model.add(Dense(128, init="uniform", input_dim=13, activation='relu'))
model.add(Dense(64, init ="uniform", activation="relu"))
model.add(Dense(1, init="uniform", activation="sigmoid"))
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam')
model.summary()
history=model.fit(X_train,Y_train, epochs=100, batch_size=100)


plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(criterion="gini", random_state=0)
cm_random_forest = evaluate_classifier(classifier, X_train, y_train)

#Kernel SVM Classifier - RBF - 94% accuracy on test set. Linear - 96% accuracy on test set
from sklearn.svm import SVC
classifier = SVC(kernel="linear", random_state=0)
cm_svm = evaluate_classifier(classifier, X_train, y_train)

#Neural Network
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Dropout

classifier = Sequential()
classifier.add(
    Dense(input_dim=100, output_dim=50, activation="relu", init="uniform"))
classifier.add(Dropout(p=0.1))
classifier.add(Dense(output_dim=50, activation="relu", init="uniform"))
classifier.add(Dropout(p=0.1))
classifier.add(Dense(output_dim=6, activation="softmax", init="uniform"))
classifier.compile(optimizer="adam",
                   loss="categorical_crossentropy",
                   metrics=["accuracy"])
classifier.fit(X_train_pca, y_train, batch_size=25, epochs=100)
y_pred = classifier.predict(X_test_pca)
y_prediction = np.argmax(y_pred, axis=1)
y_test = np.argmax(y_test, axis=1)

#y_prediction = np.array([1,2,3,4,5,6])

from sklearn.metrics import confusion_matrix