Пример #1
0
class my_classification_model:
    def __init__(self, model_name, no_class):
        self.model_name = model_name
        self.no_class = no_class
        if model_name == 'lr':
            self.model = LogisticRegression()
        if model_name == 'sv':
            self.model = SVC()
        if model_name == 'rf':
            self.model = RandomForestClassifier()
        if model_name == 'dnn1':
            self.model = keras.Sequential([
                keras.layers.Dense(128, activation='relu'),
                keras.layers.Dense(self.no_class, activation='softmax')
            ])
            self.model.compile(optimizer='adam',
                               loss='sparse_categorical_crossentropy',
                               metrics=['accuracy'])
        if model_name == 'dnn2':
            self.model = keras.Sequential([
                keras.layers.Dense(256, activation='relu'),
                keras.layers.Dense(128, activation='relu'),
                keras.layers.Dense(self.no_class, activation='softmax')
            ])
            self.model.compile(optimizer='adam',
                               loss='sparse_categorical_crossentropy',
                               metrics=['accuracy'])

    def fit(self, X_train, y_train):
        if model_name in ['lr', 'sv', 'rf']:
            self.model.fit(X_train, y_train)
        if model_name in ['dnn1', 'dnn2']:
            self.model.fit(X_train, y_train, epochs=150, verbose=0)

    def predict(self, X_test):
        if model_name in ['lr', 'sv', 'rf']:
            return self.model.predict(X_test)
        if model_name in ['dnn1', 'dnn2']:
            return np.argmax(self.model.predict(X_test), axis=1)

    def predict_s(self, X_test):
        if model_name == 'sv':
            return self.model.predict(X_test)
        if model_name in ['lr', 'rf']:
            return self.model.predict_proba(X_test) @ np.arange(self.no_class)
        if model_name in ['dnn1', 'dnn2']:
            return self.model.predict(X_test) @ np.arange(self.no_class)
Пример #2
0
def main():
    
    width = 128
    height = 128
    depth = 3
    classes = 2
    NUM_EPOCHS = 50
    
    
    #initialize the optimizer and model
    opt = tf.keras.optimizers.SGD(lr=0.01)
    
    project_dir = "deepfake-detection-challenge"
    #train_metadata, train_videos, labels, originals = load_json(project_dir)
    train_sub_dir = "/train_sample_videos/"
    dest_train_1 = '/train_1/'
    #break_to_frames_train(project_dir, train_videos, labels, width, height)
    
    #test_video_names, test_videos = load_test_videos(project_dir)
    test_sub_dir = "/test_videos/"
    dest_test_1 = '/test_1/'
    #break_to_frames_test(project_dir, test_videos, width, height)
  
    
    train_new_csv = make_dataframe_train(project_dir)
    test_new_csv = make_dataframe_test(project_dir)
    train_new_csv = '/train_new.csv'
    
    X_train, y_train, X_test, y_test, train, y_train_original, y_test_original  = get_Xy(project_dir,train_new_csv, width, height, depth)
    
    #Normalization
    X_train = X_train.astype("float")/ 255.0
    X_test = X_test.astype ("float")/ 255.0
    
    #One hot encode y
    
     
    choice = 4
    
    if choice == 1:  #not working 
        base_model = vgg16Model(X_train, X_test, width, height, depth, classes)
        
        # checkpointing to save the weights of best model
        mcp_save = tf.keras.callbacks.ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')
        # compiling the model
        base_model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
        # training the model
        H = base_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128)
        print ("Base Model - Test Data Loss and Accuracy: ", model.evaluate(X_test, y_test))
        
        print("Final Plot ")
        plotAccLoss(H, NUM_EPOCHS)
        
    if choice == 2: 
        # Feature Extraction and Usage of Secondary Model
        vggModel = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(width, height, depth))
        print(vggModel.summary())
       
        X_train_new = vggModel.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = vggModel.predict(X_test)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if (secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
            results = model.predict(X_val_new)
            print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test))
    
        if(secondary_model == 'naive_bayes'):
            print("Secondary Model - Using Naive Bayes")
            nBayes = GaussianNB()
            nBayes = nBayes.fit( X_train_new , y_train)
            accuracy = nBayes.score(X_val_new, y_test)
            print ("Naive Bayes Accuracy ", accuracy)
      
    if choice == 3: 
        # not working
        # FineTuning 
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
      
        model =tf.keras.models.Sequential()
        model.add (inceptionV3Model)
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dropout (0.5))
      
        model.add(tf.keras.layers.Dense (256, 'relu'))
        model.add(tf.keras.layers.Dense (classes, activation='sigmoid'))
        print (model.summary)
        NUM_EPOCHS =50
        opt = tf.keras.optimizers.SGD(lr=0.001)
        model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
      
        H = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))
      
        plotAccLoss(H, NUM_EPOCHS)
      
        print ("\n Phase B  - Fine Tune Fully Connected Layer and Selected Convolutional Layers \n")
        inceptionV3Model.trainable = True
        trainableFlag = False
        for layer in inceptionV3Model.layers:
          if layer.name == 'block4_conv1':
            trainableFlag = True
          layer.trainable = trainableFlag
        opt = tf.keras.optimizers.SGD(lr=0.00001)
        model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
        print (model.summary)
      
        H = model.fit(trainX, trainY, epochs=NUM_EPOCHS, batch_size=32, validation_data=(testX, testY))
        print("Final Plot ")
        plotAccLoss(H, NUM_EPOCHS)


    if choice == 4:
        # works
        # Feature Extraction and Usage of Secondary Model
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
        
        print(inceptionV3Model.summary())
       
        X_train_new = inceptionV3Model.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = inceptionV3Model.predict(X_test)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if(secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
            accuracy = evaluate(model, X_val_new, y_test)
            # results = model.predict(X_val_new)
            # print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test))
            print("Random Forest Accuracy ", accuracy)
    
        if(secondary_model == 'naive_bayes'):
            print("Secondary Model - Using Naive Bayes")
            nBayes = GaussianNB()
            nBayes = nBayes.fit( X_train_new , y_train)
            accuracy = nBayes.score(X_val_new, y_test)
            print ("Naive Bayes Accuracy ", accuracy)

    if choice == 41:
        # works
        # Feature Extraction and Usage of Secondary Model
        inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth))
        inceptionV3Model.trainable = False 
        
        print(inceptionV3Model.summary())
       
        X_train_new = inceptionV3Model.predict(X_train)
        X_train_new = X_train_new.reshape(X_train_new.shape[0], -1)
        X_val_new = inceptionV3Model.predict(X_test)
        print("X_val_new b4 reshaping ", X_val_new)
        X_val_new = X_val_new.reshape(X_val_new.shape[0], -1)
         
        secondary_model = 'random_forest'
        
        if(secondary_model == 'random_forest'):
            print("Secondary Model - Random Forest ")
            model = RandomForestClassifier(200)
            model.fit(X_train_new, y_train)
            # evaluate the model
           
            predY = model.predict(X_val_new)
            #accuracy on the images
            print ("Images - Random Forest Accuracy ", metrics.accuracy_score(predY, y_test))
        
            
        #name of the video , label in X_val_new
        #collect all the images/group by all iamges with same irst name and count probability , if out of 11 frames atleast 3 are fake, then video is fake`
         # storing the images and their class in a dataframe
         
        # print("train.head() ", train.head(), train.shape )
        # print("y_test ", y_test, y_test.shape )
        # print("predY ", predY, predY.shape )
        # print("predY[:,0] ", predY[:,0])   #this a series
        # print("X_val_new ", X_val_new,X_val_new.shape )
        
        
        # pred_data_frame = train.copy(deep=True)
        # video_names = []
        # image_names = train['image']
        
        
        
        # for i in range(len(image_names)):
        #     #get the video name from the frame e.g.  aagfhgtpmv.mp4_frame0.jpg
        #     video_names.append(image_names[i].split("_")[0])
            
        # pred_data_frame['video'] =  video_names
        # print("pred_data_frame.head() ", pred_data_frame.head())
        # pred_data_frame['pred_image_fake'] = predY[:,0]
        # pred_data_frame['pred_image_real'] = predY[:,1]
        
        
        
        # pred_video_label1 = []        
        # # #sort the df based on video names
        # # pred_data_frame = pred_data_frame.sort_values(by=['video'])
        # pred_video_label = pred_data_frame.groupby(['video'])['pred_image_label'].count()
        # print(pred_video_label.head())


        # print ("Video Classification Accuracy ", metrics.accuracy_score(predY, y_test))

        # if(secondary_model == 'naive_bayes'):
        #     print("Secondary Model - Using Naive Bayes")
        #     nBayes = GaussianNB()
        #     nBayes = nBayes.fit( X_train_new , y_train)
        #     accuracy = nBayes.score(X_val_new, y_test)
        #     print ("Naive Bayes Accuracy ", accuracy)

            
    if choice == 5:
        
        #lstm
        model = Sequential()
        model.add(LSTM(256,dropout=0.2,input_shape=(train_data.shape[1],train_data.shape[2])))
        model.add(Dense(1024, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(5, activation='softmax'))
        sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True)
        model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        #model.load_weights('video_1_LSTM_1_512.h5')
        callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ]
        nb_epoch = 500
        model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
        
        return model
    
    if choice ==6:
        #ensemble         
        vggModel= tf.keras.applications.VGG16 (weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
        model1 = tf.keras.models.Sequential()
        model1.add (vggModel)
        model1.add(tf.keras.layers.Flatten())
        model1.add(tf.keras.layers.Dropout (0.5))
        model1.add(tf.keras.layers.Dense (256, 'relu'))
        model1.add(tf.keras.layers.Dense (17, activation='softmax'))

        inceptionv3model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
    
        model2 = tf.keras.models.Sequential()
        model2.add(inceptionv3model)
        model2.add(tf.keras.layers.Flatten())
        model2.add(tf.keras.layers.Dropout (0.5))
        model2.add(tf.keras.layers.Dense (256, 'relu'))
        model2.add(tf.keras.layers.Dense (17, activation='softmax'))

       
        model_name = 'knn'
        if(model_name == 'randomforest'):
            model = RandomForestClassifier(200)
            model.fit(featuresTrain, trainY)
            # evaluate the model
            results = model.predict(featuresVal)
            print (metrics.accuracy_score(results, testY))
    
       
        if(model_name == 'knn'):
            print("using knn")
            knn = KNeighborsClassifier(n_neighbors=3)
            knn.fit(featuresTrain, trainY)
            knn.predict(featuresVal)
            results = knn.predict(featuresVal)
            print (metrics.accuracy_score(results, testY))
    
      
        if(model_name == 'naive_bayes'):
            print("Using Naive Bayes")
            
            nBayes = GaussianNB()
            nBayes = nBayes.fit( featuresTrain , trainY)
            accuracy = nBayes.score(featuresVal, testY)
            print ("Naive Bayes Accuracy ", accuracy)
    
        
        if(model_name == 'svm'):
            print("Using SVM")
           
            svc = SVC(gamma='auto')
            svc = svc.fit(featuresTrain, trainY)
    #         accuracy = svc.score(test_features, test_labels)
            accuracy = evaluate(svc, featuresVal, testY)
            print ("SVM Accuracy ", accuracy)      
        # resnet50model = tf.keras.applications.resnet50(weights = 'imagenet',include_top =False, input_shape =(128, 128,3))
        # model3 = tf.keras.models.Sequential()
        # model3.add(resnet50model)
        # model3.add(tf.keras.layers.Flatten())
        # model3.add(tf.keras.layers.Dropout (0.5))
        # model3.add(tf.keras.layers.Dense (256, 'relu'))
        # model3.add(tf.keras.layers.Dense (17, activation='softmax'))
    
         # Find the probabilities of all 17 classes in each instance of test data - should be 340 *17 
        predicted_vals1 = model1.predict(testX)
        print("predicted_vals1 shape ", predicted_vals1.shape )
        print("predicted_vals1 ", predicted_vals1 )
    
        predicted_vals2 = model2.predict(testX)
        print("predicted_vals2 shape ", predicted_vals2.shape )
        print("predicted_vals2 ", predicted_vals2 )
        
    
        # predicted_vals3 = model3.predict(testX)
        # print("predicted_vals3 shape ", predicted_vals3.shape )
        # print("predicted_vals3 ", predicted_vals3 )
    
        # element wise addition will help, as we want to add probabilities of each class for each image. Then takke average,
        # as I am using 3 models so 1/3 is multipled to the sum
        predY_sum = predicted_vals1+ predicted_vals2
        element_wise_sum_avg = predY_sum * (1/2)
    
        # Now doing np.argmax
    
        predY = np.argmax(element_wise_sum_avg, axis =1) 
    
        print("predY ", predY)
    
        print("Checking shapes of testY and predY ", testY.shape, " ", predY.shape)
    
        accuracy = accuracy_score(testY, predY)
    
        print(accuracy)
    
    if choice == 7:
        resnet101model = tf.keras.applications.ResNet101(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
        print(resnet101model.summary())
   
        featuresTrain = resnet101model.predict(trainX)
        featuresTrain = featuresTrain.reshape(featuresTrain.shape[0], -1)
        featuresVal = resnet101model.predict(testX)
        featuresVal = featuresVal.reshape(featuresVal.shape[0], -1)
print(padded_test)
#%%
#RandomForest model fitting
model = RandomForestClassifier(n_estimators=100)
model.fit(padded_train, y_train)
#%%
y_pred = model.predict(padded_test)
acc = accuracy_score(y_pred, y_test)
print(acc * 100, "%")

# %%
#%%
vocab_size = 50_000
one_hots = [one_hot(word, vocab_size) for word in X_train]
print(one_hots)
# %%
padded = pad_sequences(one_hots, padding='post', maxlen=5)
print(padded)
# %%
model = Sequential()
model.add(Embedding(vocab_size, 50))
model.compile("adam", "mse")
# %%
predict = model.predict(padded)
# %%
predict.shape

# %%

# %%
Пример #4
0
def main ():
    print("......... Welcome to SBA Loan Data Analysis ....... ")
    print()
    ip1 = int(input("What you want to do : \n 1) Prediction \n or \n 2) Analyze the data..?? \n\n "))
    
    if ip1 == 1:
        print("Menu :\n \
            1)Random forest \n \
            2)Decision Tree \n \
            3)Naives Bayes \n \
            4)SVM \n \
            5)XG Boost \n \
            6)KNN \n \
            7)Keras Neural Network ")  
        ip2 = int(input("Enter a value from Above Menu : "))
        
        # Importing the libraries
        import numpy as np
        import pandas as pd
        
        # Importing the dataset
        D7aFY1991_FY1999 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY1991_FY1999_1.csv')
        D7aFY2000_FY2009 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2000_FY2009_1.csv')
        D7aFY2010_Present = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2010_Present_1.csv')
                
        #merge same dataframes
        
        Data_7a = D7aFY1991_FY1999.append(D7aFY2000_FY2009)
        Data_7a = Data_7a.append(D7aFY2010_Present)
        
        #create sample data
        #Data_sample_7a = Data_7a.sample(frac = 0.1,random_state = 0)
        Data_sample_7a = Data_7a
        
        Data_sample_7a = Data_sample_7a.iloc[:,[4,6,9,11,12,14,16,17,18,19,20,24,25,26,28,29]].values
        
        #convert into pandas dataframe
        
        Data_sample_7a = pd.DataFrame(data=Data_sample_7a)
        
        # Taking care of missing data
        
        from sklearn.preprocessing import Imputer
        imputer = Imputer(missing_values = 'NaN', strategy = 'most_frequent', axis = 0)
        imputer = imputer.fit(Data_sample_7a.iloc[:,10:11]) 
        Data_sample_7a.iloc[:,10:11] = imputer.transform(Data_sample_7a.iloc[:,10:11])
        
        from sklearn.preprocessing import Imputer
        imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
        imputer = imputer.fit(Data_sample_7a.iloc[:,8:9])
        Data_sample_7a.iloc[:,8:9] = imputer.transform(Data_sample_7a.iloc[:,8:9])
        
        Data_sample_7a = Data_sample_7a.dropna()
              
        #slpit data columns into dependent and independent variables
        
        X7a = Data_sample_7a.iloc[:,0:14].values #independent
        y7a = Data_sample_7a.iloc[:,[15]].values #dependent
        
        # =============================================================================
        #convert numpy objects to pandas dataframes
        pd_X7a = pd.DataFrame(data=X7a[0:,0:])
        pd_y7a = pd.DataFrame(data=y7a[0:,0:])
        # =============================================================================
        
        #encoding categorical data in independent variable
        pd_X7a=np.asarray(pd_X7a)#convert pandas dataframe into numpy array
        pd_X7a = encoder(pd_X7a)
        
        #getuser data and encode
        Userdata = getdata(pd_X7a)
        Userdata=np.asarray(Userdata)#convert pandas dataframe into numpy array
        Userdata = encoder(Userdata)
                
        #encoding dependent variable
        pd_y7a[0] = pd_y7a[0].replace(['PIF'],'0')
        pd_y7a[0] = pd_y7a[0].replace(['CANCLD','EXEMPT','CHGOFF','COMMIT'],'1')
        
        pd_y7a=np.asarray(pd_y7a)#convert pandas dataframe into numpy array
        
        # Splitting the dataset into the Training set and Test set
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(pd_X7a, pd_y7a, test_size = 0.25, random_state = 0)
                
        # Feature Scaling of accuracy data
        (X_train1, X_test) = scalingFunction(X_train,X_test)
        
        # Feature Scaling of user data
        from sklearn.preprocessing import StandardScaler
        sc = StandardScaler()
        Userdata = sc.fit_transform(Userdata)
        
                
        if ip2 == 1:
                
                # Fitting Random Forest Classification to the Training set
                from sklearn.ensemble import RandomForestClassifier
                classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
                classifier.fit(X_train1, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                y_pred = pd.DataFrame(data=y_pred[0:]) #converting to data frame 
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
                
                
        elif ip2 == 2:
            
                # Fitting Decision Tree Classification to the Training set
                from sklearn.tree import DecisionTreeClassifier
                classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 3:
            
                # Fitting Naive Bayes to the Training set
                from sklearn.naive_bayes import GaussianNB
                classifier = GaussianNB()
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 4:
            # Fitting SVM to the Training set
                from sklearn.svm import SVC
                classifier = SVC(kernel = 'linear', random_state = 0)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 5:
            # Fitting XGBoost to the Training set
                from xgboost import XGBClassifier
                classifier = XGBClassifier()
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 6:
            # Fitting K-NN to the Training set
                from sklearn.neighbors import KNeighborsClassifier
                classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
                classifier.fit(X_train, y_train)
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the user set results
                user_pred = classifier.predict(Userdata)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
            
        elif ip2 == 7:
            # Importing the Keras libraries and packages
                import keras
                from keras.models import Sequential
                from keras.layers import Dense
                
                # Initialising the ANN
                classifier = Sequential()
                
                # Adding the input layer and the first hidden layer1
                classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu', input_dim = 14))
                
                # Adding the second hidden layer
                classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu'))
                
                # Adding the output layer
                classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
                
                # Compiling the ANN
                classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
                
                # Fitting the ANN to the Training set
                classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)
                
                # Part 3 - Making the predictions and evaluating the model
                
                # Predicting the Test set results
                y_pred = classifier.predict(X_test)
                y_pred = (y_pred > 0.5)
                
                # Making the Confusion Matrix
                from sklearn.metrics import confusion_matrix
                cm = confusion_matrix(y_test, y_pred)
                print("Confusion Matrix : \n")
                print(cm)
                print("Accuracy rate is : \n ")
                print((cm[0,0]+cm[1,1])/(len(y_pred)))
                
                
                # Predicting the Test set results
                user_pred = classifier.predict(Userdata)
                user_pred = (user_pred > 0.5)
                converter(user_pred)
                print("The Loan will be : ")
                print(user_pred)
                
                      
            
    elif ip1 == 2:      
        print("Menu : \n \
        1)Business wise JobsSupported \n \
        2)Compare:Gross Aproval Vs SBA Aproval \n \
        3)DistOffice wise SBAapproval \n \
        4)GrossApproval Per LoanStatus \n \
        5)GrossApproval Per DeliveryMethod \n \
        6)JobsSupported per LoanStatus \n \
        7)SBAapproval Loan Status \n \
             ")
        ip2 = int(input("Enter a value from Above Menu : "))
        from PIL import Image 
        if ip2 == 1:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\BusinnJobsSupp.png')
            img.format = "PNG"
            img.show()
                
        elif ip2 == 2:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\Comp_GrossSBA.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 3:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\DistOff_wise_SBAappr.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 4:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppLoanSt.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 5:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppr_DeliveryMethod.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 6:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\JobsSuppLoanSt.png')
            img.format = "PNG"
            img.show()
            
        elif ip2 == 7:
            img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\SBAapprLoanSt.png')
            img.format = "PNG"
            img.show()
Пример #5
0
# Adding the second hidden layer
#classifier.add(Dense(output_dim = 150, init = 'uniform', activation = 'sigmoid'))

# Adding the third hidden layer
classifier.add(Dense(output_dim=80, init='uniform', activation='relu'))

# Adding the fourth hidden layer
classifier.add(Dense(output_dim=12, init='uniform', activation='sigmoid'))

# Adding the output layer
classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))

# Compiling the ANN
classifier.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size=10, nb_epoch=100)

# Part 3 - Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
cmANN = confusion_matrix(y_test, y_pred)

print('\n Confusion Matrix using ANN')
Пример #6
0
from keras.layers import Dense

model = RandomForestClassifier()
cross_val_score(model, X, y_true)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_true,
                                                    test_size=0.3,
                                                    random_state=42)

import keras.backend as K

model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train,
                    y_train,
                    epochs=30,
                    verbose=2,
                    validation_split=0.1)

#Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input
#model.evaluate gives the loss and accuracy for 0 and 1 index respectively
result = model.evaluate(X_test, y_test)

history = pd.DataFrame(history.history, index=history.epoch)
history.plot(ylim=(0, 1))
plt.title('the accuracy for the test set is {:.3f}'.format(result[1] * 100),
# %tensorflow_version 2.x 

# If you wish to use Tensorflow 1.X run the following line and then restart runtime
# %tensorflow_version 1.x 
# You'll need to change your import statements from tensorflow.keras to keras
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()

model.add(Dense(18, kernel_initializer = "uniform", activation = "relu", input_dim=16))
model.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))

model.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])

# Display Model Summary and Show Parameters
model.summary()

# Start Training Our Classifier 
batch_size = 10
epochs = 50

history = model.fit(X_train,
                    y_train,
                    batch_size = batch_size,
                    epochs = epochs,
                    verbose = 1,
                    )
Пример #8
0
def get_classifier(clf, input_shape=None):
    """
	This function returns a classifier object defined by clf
	INPUTS:
	@str		: a string indicating which classifier will be used
	@input_shape: in the case that a CNN model will be trained, the input shape
				  (it is necessary to build the model)
	OUTPUT
	@classifier : a classifier object
	"""
    if clf == 'SVM':
        classifier = svm.SVC(C=1e5, kernel='rbf', class_weight="balanced")
    elif clf == 'log_reg':
        classifier = linear_model.LogisticRegression(C=1e5,
                                                     class_weight="balanced")
    elif clf == 'rf':
        classifier = RandomForestClassifier(n_estimators=50,
                                            max_depth=10,
                                            class_weight="balanced")
    elif clf == 'boost':
        classifier = AdaBoostClassifier()
    elif clf == 'cnn':
        # CNN network to classify patches
        input_img = Input(shape=input_shape)
        x = input_img

        x = Convolution2D(32, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = Convolution2D(32, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)

        x = Convolution2D(32, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        x = Convolution2D(64, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = Convolution2D(64, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        x = Convolution2D(128, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = Convolution2D(128, 3, 3, border_mode='same')(x)
        x = LeakyReLU()(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        f = x
        x = Flatten()(x)
        x = Dense(16)(x)
        x = LeakyReLU()(x)
        x = Dropout(0.5)(x)
        x = Dense(2)(x)
        o = Activation('softmax')(x)

        # model train
        classifier = Model(input_img, o)
        classifier.summary()
        classifier.compile(loss='categorical_crossentropy',
                           optimizer='Adadelta',
                           metrics=[fmeasure])

    elif clf == 'Unet':
        # unet network, https://github.com/jocicmarko/ultrasound-nerve-segmentation
        inputs = Input(input_shape)
        conv1 = Convolution2D(32, 3, 3, activation='relu',
                              border_mode='same')(inputs)
        conv1 = Convolution2D(32, 3, 3, activation='relu',
                              border_mode='same')(conv1)
        pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

        conv2 = Convolution2D(64, 3, 3, activation='relu',
                              border_mode='same')(pool1)
        conv2 = Convolution2D(64, 3, 3, activation='relu',
                              border_mode='same')(conv2)
        pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

        conv3 = Convolution2D(128, 3, 3, activation='relu',
                              border_mode='same')(pool2)
        conv3 = Convolution2D(128, 3, 3, activation='relu',
                              border_mode='same')(conv3)
        pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

        conv4 = Convolution2D(256, 3, 3, activation='relu',
                              border_mode='same')(pool3)
        conv4 = Convolution2D(256, 3, 3, activation='relu',
                              border_mode='same')(conv4)
        pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

        conv5 = Convolution2D(512, 3, 3, activation='relu',
                              border_mode='same')(pool4)
        conv5 = Convolution2D(512, 3, 3, activation='relu',
                              border_mode='same')(conv5)

        up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4],
                    mode='concat',
                    concat_axis=1)
        conv6 = Convolution2D(256, 3, 3, activation='relu',
                              border_mode='same')(up6)
        conv6 = Convolution2D(256, 3, 3, activation='relu',
                              border_mode='same')(conv6)

        up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3],
                    mode='concat',
                    concat_axis=1)
        conv7 = Convolution2D(128, 3, 3, activation='relu',
                              border_mode='same')(up7)
        conv7 = Convolution2D(128, 3, 3, activation='relu',
                              border_mode='same')(conv7)

        up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2],
                    mode='concat',
                    concat_axis=1)
        conv8 = Convolution2D(64, 3, 3, activation='relu',
                              border_mode='same')(up8)
        conv8 = Convolution2D(64, 3, 3, activation='relu',
                              border_mode='same')(conv8)

        up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1],
                    mode='concat',
                    concat_axis=1)
        conv9 = Convolution2D(32, 3, 3, activation='relu',
                              border_mode='same')(up9)
        conv9 = Convolution2D(32, 3, 3, activation='relu',
                              border_mode='same')(conv9)

        conv10 = Convolution2D(1, 1, 1, activation='sigmoid')(conv9)

        classifier = Model(input=inputs, output=conv10)
        classifier.summary()
        classifier.compile(optimizer=Adam(lr=1e-3),
                           loss='binary_crossentropy',
                           metrics=[fmeasure])
    else:
        sys.exit("The classifier you chose is not implemented")

    return classifier
Пример #9
0
            if args.classifier == "random_forest":
                final_model = RandomForestClassifier(n_estimators=100,
                                                     max_depth=32,
                                                     random_state=0,
                                                     n_jobs=-1,
                                                     verbose=True)
            elif args.classifier == "logistic_regression_keras":
                classes = 26
                final_model = Sequential()
                final_model.add(
                    Dense(classes,
                          activation='softmax',
                          kernel_regularizer=regularizers.l1(0.0000001),
                          input_shape=(293, )))
                final_model.compile(optimizer=optimizers.adam(lr=0.01),
                                    loss='categorical_crossentropy',
                                    metrics=['accuracy'])
                final_model.fit(final_X,
                                to_categorical(final_Y),
                                epochs=100,
                                batch_size=32)
            elif args.classifier == "logistic_regression_scikit":
                final_model = LogisticRegression(penalty='l1',
                                                 C=1000,
                                                 multi_class="multinomial",
                                                 solver="saga",
                                                 max_iter=100,
                                                 verbose=True,
                                                 n_jobs=-1)
                final_model.fit(final_X, final_Y)
Пример #10
0
# allAlgorithms = all_estimators(type_filter='regressor')

# print(allAlgorithms)
# print(len(allAlgorithms))
# print(type(allAlgorithms))

# for (name, algorithm) in allAlgorithms:

#     model = algorithm()
#     model.fit(x_train, y_train)
#     y_pred = model.predict(x_test)
#     print(name, "의 loss = ", r2_score(y_test, y_pred))
model = RandomForestClassifier()
model.fit(x1_train, y1_train)
'''
# 5. 모델 훈련
from keras.callbacks import EarlyStopping, TensorBoard
# td_hist = TensorBoard(log_dir='./graph',
#                       histogram_freq=0,
#                       write_graph=True,
#                       write_images=True)

early_stopping = EarlyStopping(monitor='loss', patience=60, mode='auto')
model.compile(loss='mae', 
              optimizer='adam', 
              metrics=['mse']) # adam=평타는 침. # 이 때문에 아래서 acc가 나온다.
model.fit(x1_train_scaled, y1_train, 
          epochs=10, 
          batch_size=10, 
          validation_split=0.2, 
Пример #11
0
index = list(range(len(df)))
train_index = random.sample(index0, int(0.8 * len(index0))) + random.sample(
    index1,
    int(0.8 *
        len(index1)))  ##test_index is the index of test data随机选出2000个样本作为测试样本
test_index = []  ##train_index is the index of train data
for i in index:
    if i not in train_index:
        test_index.append(i)
print(len(train_index))

model = Sequential()
model.add(Dense(output_dim=50, input_dim=len(df[0]), activation='relu'))
model.add(Dense(output_dim=20, input_dim=50, activation='relu'))
model.add(Dense(output_dim=1, input_dim=20, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(df[train_index], label[train_index], nb_epoch=1000, batch_size=20)

pred = model.predict_classes(df[test_index]).reshape(len(test_index))
print(pred)

k = 0
for i in range(len(pred)):
    if pred[i] == label[test_index][i]:
        k = k + 1
print(k / len(test_index))

#model.save_weights('E:\...\my_model_weights.h5')
'''
神经网络的精度还是比较高的,并且网络的规模不需要太大,太大反而容易过拟合降低精度
Пример #12
0
def main():
    # df = combine_datasets()
    df = pd.read_csv('./data/combined.csv', index_col=0)
    # df.fillna(-1, inplace=True)
    # df = df.drop(df[~df['certificate'].isin(['G', 'PG', 'PG-13', 'R', 'Not Rated'])].index)
    # df = add_award_points(df)

    # Data preprocessing/encoding
    df = df.drop(['movie', 'movie_id', 'synopsis', 'genre'], axis=1)
    df['popularity'] = 1 / np.array(df['popularity']) * 100
    df = pd.get_dummies(df, columns=['certificate'])
    cols = df.columns.tolist()
    cols = cols[df.columns.get_loc('oscar_animated') +
                1:] + cols[:df.columns.get_loc('oscar_animated') + 1]
    df = df[cols]
    df = df.reset_index(drop=True)
    splitIndex = df.index[df['year'] == 2018][0]
    df = df.drop(['year'], axis=1)

    # Splits data into training and testing sets
    oscarStart = df.columns.get_loc('oscar_best_picture')
    x = df.iloc[:, :oscarStart].values
    y = df.iloc[:, oscarStart:].values
    y[(y > 0) & (y < 1)] = 0.5  # winner is 1, nominee is 0.5, nothing is 0
    xTrain, xTest = x[:splitIndex], x[splitIndex:]
    yTrain, yTest = y[:splitIndex], y[splitIndex:]

    # Checks how imbalanced the data is
    unique, counts = np.unique(yTrain, return_counts=True)
    print(dict(zip(unique, counts)))

    # Scales inputs to avoid one variable having more weight than another
    sc = StandardScaler()
    xTrain = sc.fit_transform(xTrain)
    xTest = sc.transform(xTest)

    modelType = 'neuralnetwork'
    predictCategory = True
    if modelType == 'randomforest':
        model = RandomForestClassifier(random_state=21)
        model.fit(xTrain, yTrain)
        yPred = model.predict(xTest)
        p = np.where(yPred == 2)
        v = np.where(yTest == 2)

    elif modelType == 'neuralnetwork':
        if not predictCategory:
            # One hot encoding for softmax activation function
            trainTargets = []
            for i in yTrain:
                if 1 in i:
                    trainTargets.append([1, 0, 0])
                elif 0.5 in i:
                    trainTargets.append([0, 1, 0])
                else:
                    trainTargets.append([0, 0, 1])
            yTrain = np.array(trainTargets)
            testTargets = []
            for i in yTest:
                if 1 in i:
                    testTargets.append([1, 0, 0])
                elif 0.5 in i:
                    testTargets.append([0, 1, 0])
                else:
                    testTargets.append([0, 0, 1])
            yTest = np.array(testTargets)

            model = Sequential()
            model.add(Dense(256, input_dim=xTrain.shape[1]))
            model.add(Activation('relu'))
            model.add(Dropout(0.2))
            model.add(Dense(3))
            model.add(Activation('softmax'))
            model.compile(optimizer=Adam(lr=0.01),
                          loss='categorical_crossentropy',
                          metrics=['mse'])

            classWeights = {
                0: counts.sum() / counts[2],
                1: counts.sum() / counts[1],
                2: counts.sum() / counts[0]
            }
            model.fit(xTrain,
                      yTrain,
                      epochs=512,
                      batch_size=32,
                      class_weight=classWeights)
        else:
            # One hot encoding for softmax activation function
            trainTargets = [[] for i in range(0, 6)]
            for i in yTrain:
                for idx, j in enumerate(i):
                    if j == 1:  # winner
                        trainTargets[idx].append([1, 0, 0])
                    elif j == 0.5:  # nominee
                        trainTargets[idx].append([0, 1, 0])
                    else:  # loser/nothing
                        trainTargets[idx].append([0, 0, 1])
            yTrain = [np.array(i) for i in trainTargets]
            testTargets = [[] for i in range(0, 6)]
            for i in yTest:
                for idx, j in enumerate(i):
                    if j == 1:  # winner
                        testTargets[idx].append([1, 0, 0])
                    elif j == 0.5:  # nominee
                        testTargets[idx].append([0, 1, 0])
                    else:  # loser/nothing
                        testTargets[idx].append([0, 0, 1])
            yTest = [np.array(i) for i in testTargets]

            if os.path.exists('best.h5'):
                model = load_model('best.h5')
            else:
                input = Input(shape=(xTrain.shape[1], ))
                x = Dense(128, activation='relu')(input)
                x = BatchNormalization()(x)
                x = Dropout(0.2)(x)
                output1 = Dense(3, activation='softmax')(x)
                output2 = Dense(3, activation='softmax')(x)
                output3 = Dense(3, activation='softmax')(x)
                output4 = Dense(3, activation='softmax')(x)
                output5 = Dense(3, activation='softmax')(x)
                output6 = Dense(3, activation='softmax')(x)
                model = Model(inputs=input,
                              outputs=[
                                  output1, output2, output3, output4, output5,
                                  output6
                              ])
                model.compile(optimizer=Adam(lr=0.01),
                              loss='categorical_crossentropy')

                classWeights = {
                    0: counts.sum() / counts[2],
                    1: counts.sum() / counts[1],
                    2: counts.sum() / counts[0]
                }
                model.fit(xTrain,
                          yTrain,
                          epochs=512,
                          batch_size=32,
                          class_weight=classWeights)
                # model.save('best.h5')

        # Training accuracy (put training data back in) and testing accuracy
        compute_model_accuracies(predictCategory, '(TRAINING)', model, xTrain,
                                 yTrain, splitIndex)
        compute_model_accuracies(predictCategory, '(TESTING)', model, xTest,
                                 yTest, splitIndex)
Пример #13
0
def tenfoldcrossvalidation(feature_map, id_truth_map, index, id_tweet_map):
	feature_map = dict(sorted(feature_map.items(), key=operator.itemgetter(1)))

	tweets = []
	truth = []
	keys = []

	for key, feature in feature_map.iteritems():
		tweets.append(feature)
		truth.append(index[id_truth_map[key]])
		keys.append(key)

	accuracy = 0.0
	tp = 0.0
	tn = 0.0
	fp = 0.0
	fn = 0.0
	for i in xrange(10):
		tenth = len(tweets)/10
		start = i*tenth
		end = (i+1)*tenth
		test_index = xrange(start,end)
		train_index = [i for i in range(len(tweets)) if i not in test_index]
		train_tweets = []
		train_keys = []
		test_tweets = []
		test_keys = []
		train_truth = []
		test_truth = []
		
		for i in xrange(len(tweets)):
			if i in train_index:
				train_tweets.append(tweets[i])
				train_truth.append(truth[i])
				train_keys.append(keys[i])
			else:
				test_tweets.append(tweets[i])
				test_truth.append(truth[i])
				test_keys.append(keys[i])

		new_train_tweets = featureselection(train_tweets, train_tweets, train_truth)
		new_test_tweets = featureselection(test_tweets, train_tweets, train_truth)

		if sys.argv[1] == "rbfsvm":
			print "RBF kernel SVM"
			clf = svm.SVC(kernel='rbf', C=1000, gamma=0.0001)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
		elif sys.argv[1] == "randomforest":
		# # Using Random forest for classification.
			print 'Random forest'
			clf = RandomForestClassifier(n_estimators=10, max_depth=None)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
			# getaccuracy(test_predicted, test_truth)
		elif sys.argv[1] == "linearsvm":
		# # Using Linear svm for classification.
			print 'Linear SVM'
			clf = svm.LinearSVC(random_state=20)
			clf.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = clf.predict(np.array(new_test_tweets))
			# print "F.score:"
			# print(f1_score(test_predicted, test_truth, average="micro"))
			# print "Accuracy:"
			# print(accuracy_score(test_predicted, test_truth, normalize="False"))
			# getaccuracy(test_predicted, test_truth)
		# elif sys.argv[1] == "polysvm":
		
		# 	print 'Poly SVM'
		# 	clf = svm.SVC(kernel='poly')
		# 	clf.fit(np.array(new_train_tweets), np.array(train_truth))
		# 	test_predicted = clf.predict(np.array(new_test_tweets))

		elif sys.argv[1] == "nn":
		
			print 'Neural Network'
			clf = Sequential()
			clf.add(Dense(7460, activation='relu'))
			clf.add(Dense(5000, activation='relu'))
			clf.add(Dense(2000, activation='relu'))
			clf.add(Dense(500, activation='relu'))
			clf.add(Dense(1, activation='softmax'))
			clf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
			clf.fit(np.array(new_train_tweets), np.array(train_truth), batch_size=64, epochs=10, validation_split=0.1)
			test_predicted = clf.predict(np.array(new_test_tweets))
			print(f1_score(test_predicted, test_truth, average="micro"))
		elif sys.argv[1]=="xgb":
			xgb_model = xgb.XGBClassifier(objective="binary:logistic")
			xgb_model.fit(np.array(new_train_tweets), np.array(train_truth))
			test_predicted = xgb_model.predict(np.array(new_test_tweets))

		accuracy += getaccuracy(test_predicted, test_truth)
		tp += gettp(test_predicted, test_truth)
		tn += gettn(test_predicted, test_truth)
		fp += getfp(test_predicted, test_truth)
		fn += getfn(test_predicted, test_truth)
		if(sys.argv[1]=="nn"):
			print accuracy
			# print tp, tn, fp, fn
			precision = tp/(tp+fp)
			recall = tp/(tp+fn)
			print "F-score:"
			print (2*precision*recall)/(precision + recall)
			break
	print accuracy/10.0
	# print tp, tn, fp, fn
	precision = tp/(tp+fp)
	recall = tp/(tp+fn)
	print "F-score:"
	print (2*precision*recall)/(precision + recall)
Пример #14
0
metrics = Metrics()
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(200,), max_iter=500, alpha=0.0001,
                     solver='', verbose=10,  random_state=0,tol=0.00000001,batch_size=100)


"""

model.add(Dense(8, input_dim=8, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

"""

#compile the model

"""

model.compile(loss='binary_crossentropy', optimizer='adagrad',metrics=['accuracy'])
model.summary()

"""

model.fit(X_train, y_train)
y_predict=model.predict(X_test)
print(accuracy_score(y_test, y_predict))

#print("Score of Neural Network--->", score[0])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)

import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

K.clear_session()  # clear model from memory

model = Sequential()
model.add(Dense(1, input_shape=(4, ), activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# train model
history = model.fit(X_train, y_train,
                    epochs=10)  # record history of training progress
result = model.evaluate(X_test, y_test)

# visualize the training process
historydf = pd.DataFrame(history.history, index=history.epoch)
historydf.plot(ylim=(0, 1))
plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15)

# ===================================
#   manually tune learning rate
# ===================================
Пример #16
0
def algorithm(method_A, OneVsRest, OneVsOne, randomized):

    print("Selecting algorithm...")
    print("      ")

    if method_A == "svm":

        print("Starting with " + method_A)
        print("      ")

        parameters_svm = {
            'kernel': ('linear', 'rbf'),
            'C': [1, 3, 10, 100],
            'gamma': [0.01, 0.001]
        }
        model = svm.SVC()
        model = search_par(randomized, model, parameters_svm)

    if method_A == "random_forest":

        print("Starting with " + method_A)
        print("      ")

        parameters_random = {
            "max_depth": [2, 3, None],
            "max_features": [2, 4, 6],
            "min_samples_split": [2, 4, 6],
            "min_samples_leaf": [2, 4, 6],
            "bootstrap": [True, False],
            "criterion": ["gini", "entropy"]
        }
        model = RandomForestClassifier(n_estimators=100)
        model = search_par(randomized, model, parameters_random)

    if method_A == "logistic":

        print("Starting with " + method_A)
        print("      ")

        parameters_logistic = {'C': [100, 1000], 'tol': [0.001, 0.0001]}
        model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
        model = search_par(randomized, model, parameters_logistic)

    if method_A == "neural_networks":

        print("Starting with " + method_A)
        print("      ")

        #model = MLPClassifier()

        model = Sequential()
        model.add(
            Dense(991, input_dim=179, init='normal')
        )  # number of features of the data +1 node for the bias term.
        model.add(Activation('relu'))
        model.add(Dropout(0.2))
        model.add(
            Dense(495, init='normal')
        )  #In sum, for most problems, one could probably get decent performance (even without a second optimization step) by setting the hidden layer configuration using just two rules: (i) number of hidden layers equals one; and (ii) the number of neurons in that layer is the mean of the neurons in the input and output layers.
        model.add(Activation('relu'))
        model.add(Dropout(0.5))
        model.add(
            Dense(99, init='normal')
        )  # If the NN is a classifier, then it also has a single node unless softmax is used in which case the output layer has one node per class label in your model.
        model.add(Activation('softmax'))

        sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])

        OneVsRest = False
        OneVsOne = False

    if OneVsRest:

        print("Using OneVsRest ")
        print("      ")

        return OneVsRestClassifier(model)

    if OneVsOne:

        print("Using OneVsOne")
        print("      ")

        return OneVsOneClassifier(model)

    print("Algorithm selected: " + method_A)
    print("      ")

    return model
Пример #17
0
# Add an input layer
model.add(Dense(16, activation='relu', input_dim=10))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(12, activation='relu'))

# Add another input layer
model.add(Dense(8, activation='relu'))

# Add an output layer
model.add(Dense(
    9,
    activation='softmax'))  # output 9 correspond to number of predicted class

# compile model and run the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(xx,
          yy,
          epochs=100,
          validation_data=(val_x, pd.get_dummies(val_y).values))
# Also for evaluate, val_y have to change to one-hot encoding dummy variable
model.evaluate(val_x, pd.get_dummies(val_y).values)

model.summary()
Пример #18
0
                               class_weight='balanced',
                               n_estimators=50)
model.fit(x_train, y_train)

# add predictions to dataset
df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist())

# train LSTM model
max_features = len(word_to_index)
maxlen = len(features[0])
batch_size = 32
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
x_train, x_test, y_train, y_test = np.array(x_train), np.array(
    x_test), np.array(y_train), np.array(y_test)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=1,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
Пример #19
0
# model
y_prediction = model.predict(X_test)
print("\naccuracy",
      np.sum(y_prediction == df_test['label'].values) / float(len(y_test)))

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

start = time()

model = Sequential()
model.add(Dense(512, input_shape=(784, )))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

mode.fit(X_train, y_train_onehot)

print('\ntime taken %s seconds' % str(time() - start))

y_prediction = model.predict_classes(X_test)
print("\n\naccuracy", np.sum(y_prediction == y_test) / float(len(y_test)))
Пример #20
0
class Vorace_agent:

    initialState = None
    classifier = None
    history = None
    callbacks_list = None
    epochs = 40
    batch_size = 0

    def __init__(self):
        self.initialState = None
        self.classifier = None
        self.history = None

    def __init__(self,
                 typeP,
                 nClasses,
                 inputLayer=None,
                 batch_size=0,
                 callbacks_list=None,
                 n_classifiers=10):

        if typeP == 6:
            typeP = random.randint(0, 5)
        if typeP == 3:
            typeP = random.randint(0, 2)

        self.batch_size = batch_size
        self.callbacks_list = callbacks_list
        #print(typeP)
        if typeP == 0:
            self.classifier = Vorace_agent.getModel(nClasses, inputLayer)
            self.classifier = Model(inputLayer, self.classifier)
            if nClasses == 2:
                self.classifier.compile(loss='binary_crossentropy',
                                        metrics=['accuracy'],
                                        optimizer='adam')
            else:
                self.classifier.compile(loss='categorical_crossentropy',
                                        metrics=['accuracy'],
                                        optimizer='adam')
            self.initialState = self.classifier.get_weights()

        elif typeP == 1:
            if random.randint(0, 1) == 0:
                self.classifier = DecisionTreeClassifier(
                    criterion="gini",
                    max_depth=random.randint(5, 25),
                    random_state=0)
            else:
                self.classifier = DecisionTreeClassifier(
                    criterion="entropy",
                    max_depth=random.randint(5, 25),
                    random_state=0)
            self.initialState = clone(self.classifier)

        elif typeP == 2:
            A = math.log(pow(2, -5))
            B = math.log(pow(2, 5))

            c_value = math.exp(random.uniform(A, B))

            if random.randint(0, 1) == 0:
                self.classifier = svm.SVC(kernel='rbf',
                                          C=c_value,
                                          gamma='auto',
                                          probability=True)
            else:
                A = 3
                B = 5

                degree = int(round(random.uniform(A, B)))
                #print("C: {}  DEGREE: {}".format(c_value, degree))
                self.classifier = svm.SVC(kernel='poly',
                                          degree=degree,
                                          C=c_value,
                                          gamma='auto',
                                          probability=True)
            self.initialState = clone(self.classifier)

        elif typeP == 4:

            value_lists = {
                'bootstrap': [True, False],
                'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
                'max_features': ['auto', 'sqrt'],
                'min_samples_leaf': [1, 2, 4],
                'min_samples_split': [2, 5, 10],
                'n_estimators': [10, 20, 50, 100, 200]
            }
            #'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

            params = {
                'bootstrap':
                random.choice(value_lists['bootstrap']),
                'max_depth':
                random.choice(value_lists['max_depth']),
                'max_features':
                random.choice(value_lists['max_features']),
                'min_samples_leaf':
                random.choice(value_lists['min_samples_leaf']),
                'min_samples_split':
                random.choice(value_lists['min_samples_split']),
                'n_estimators':
                random.choice(value_lists['n_estimators']),
            }

            self.classifier = RandomForestClassifier(**params)
            self.initialState = clone(self.classifier)
        elif typeP == 5:
            self.classifier = xgb.XGBClassifier(
                max_depth=random.randint(3, 25),
                n_estimators=n_classifiers,
                subsambple=random.random(),
                colsample_bytree=random.random())
            self.initialState = clone(self.classifier)

    def reset(self):
        #print(type(self.classifier))
        if type(self.classifier) == Model:
            self.classifier.set_weights(self.initialState)
        else:
            self.classifier = clone(self.initialState)

    def fit(self, x, y, y_oneHot=None):
        if type(self.classifier) == Model:
            self.history = self.classifier.fit(x,
                                               y_oneHot,
                                               epochs=self.epochs,
                                               batch_size=self.batch_size,
                                               shuffle=True,
                                               callbacks=self.callbacks_list,
                                               verbose=0)
            self.history = self.history.history['acc'][-1]
        else:
            self.classifier.fit(x, y)
            y_pred = self.classifier.predict(x)
            self.history = metrics.accuracy_score(y, y_pred)

    def predict(self, x):

        if type(self.classifier) == Model:
            y_pred = self.classifier.predict(x)
        else:
            y_pred = self.classifier.predict_proba(x)

        return y_pred

    def getModel(nClass, inputLayer, nHLayers=4):

        n = random.randint(2, nHLayers)
        nInput = K.int_shape(inputLayer)[1]

        #A=math.log(nInput)
        #B=math.log(nInput**2)
        A = math.log(16)
        B = math.log(128)

        #print("A:"+str(A))
        #print("B:"+str(B))
        activation = ('relu', 'tanh')

        nNodes = int(round(math.exp(random.uniform(A, B))))
        act_fun = random.randint(0, len(activation) - 1)
        #print("nNodes:"+str(nNodes))
        x = Dense(nNodes, activation=activation[act_fun])(inputLayer)
        #print(K.int_shape(inputLayer)[1])

        for i in range(1, n):
            #nNodes = random.randint(nInput*2,nInput**2)
            nNodes = int(round(math.exp(random.uniform(A, B))))
            #print(nNodes)
            act_fun = random.randint(0, len(activation) - 1)
            #print(act_fun)
            x = Dense(nNodes, activation=activation[act_fun])(x)

        if nClass == 2:
            x = Dense(nClass, activation='sigmoid')(x)
        else:
            x = Dense(nClass, activation='softmax')(x)

        return x
Пример #21
0
# DecisionTree Classifier
tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)),
              "min_samples_leaf": list(range(5,7,1))}
grid_tree = GridSearchCV(DecisionTreeClassifier(), tree_params)
grid_tree.fit(X_train, Y_train)

# tree best estimator
tree_clf = grid_tree.best_estimator_


model=Sequential()
model.add(Dense(128, init="uniform", input_dim=13, activation='relu'))
model.add(Dense(64, init ="uniform", activation="relu"))
model.add(Dense(1, init="uniform", activation="sigmoid"))
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam')
model.summary()
history=model.fit(X_train,Y_train, epochs=100, batch_size=100)


plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


model.evaluate(X_test,Y_test)
#Kernel SVM Classifier - RBF - 94% accuracy on test set. Linear - 96% accuracy on test set
from sklearn.svm import SVC
classifier = SVC(kernel="linear", random_state=0)
cm_svm = evaluate_classifier(classifier, X_train, y_train)

#Neural Network
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Dropout

classifier = Sequential()
classifier.add(
    Dense(input_dim=100, output_dim=50, activation="relu", init="uniform"))
classifier.add(Dropout(p=0.1))
classifier.add(Dense(output_dim=50, activation="relu", init="uniform"))
classifier.add(Dropout(p=0.1))
classifier.add(Dense(output_dim=6, activation="softmax", init="uniform"))
classifier.compile(optimizer="adam",
                   loss="categorical_crossentropy",
                   metrics=["accuracy"])
classifier.fit(X_train_pca, y_train, batch_size=25, epochs=100)
y_pred = classifier.predict(X_test_pca)
y_prediction = np.argmax(y_pred, axis=1)
y_test = np.argmax(y_test, axis=1)

#y_prediction = np.array([1,2,3,4,5,6])

from sklearn.metrics import confusion_matrix
cm_nn = confusion_matrix(y_test, y_prediction)