Пример #1
0
def scr2(X, y):
    print("**************************************")
    print("*     Classification Model           *")
    print("*    1-GaussianNB                    *")
    print("*    2- KMeans                       *")
    print("*    3- hierachie Clustering         *")
    print("*    4- Support Vector Machine       *")
    print("*    5- Multiclassification          *")
    print("*    6- Tree Classication            *")
    print("*    7- Return Main                  *")
    print("**************************************")
    var2 = int(input('Enter the model No\t'))
    if var2 == 1:
        gau(X, y)
    else:
        if var2 == 2:
            kmeans(X, y)
        else:
            if var2 == 3:
                hiclus(X, y)
            else:
                if var2 == 4:
                    svm(X, y)
                else:
                    if var2 == 5:
                        multcla(X, y)
                    else:
                        if var2 == 6:
                            trea(X, y)
                        else:
                            if var2 == 7:
                                scr3(X, y)

    print("ctrl -D to exit")
    scr2(X, y)
Пример #2
0
def main():

    df_preprocessed_data = process_data()
    neural_net(df_preprocessed_data)  #Done Setup
    decision_tree(df_preprocessed_data)  #Done Setup
    adaboost(df_preprocessed_data)  #Done Setup
    knn(df_preprocessed_data)  #Done Setup
    svm(df_preprocessed_data)  #Done Setup
Пример #3
0
def main():

    df_submission,df_train,df_test=process_data() 
    neural_net(df_submission,df_train,df_test) #Done Setup
    decision_tree(df_submission,df_train,df_test) #Done Setup
    adaboost(df_submission,df_train,df_test) #Done Setup
    knn(df_submission,df_train,df_test) #Done Setup
    svm(df_submission,df_train,df_test) #Done Setup
Пример #4
0
def main():
    datapath = '../data/'
    data = read_data(datapath)
    [X,y] = create_features(data)
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=7641)
    nn(X_train, X_test, y_train, y_test)
    svm(X_train, X_test, y_train, y_test)
    dt(X_train, X_test, y_train, y_test)
    boost(X_train, X_test, y_train, y_test)
    knn(X_train, X_test, y_train, y_test)    
def imageProcess():

    try:
        img = cv2.imread('cropped.jpg', 0)
        img = cv2.medianBlur(img, 3)
        cv2.imwrite('medianimg.jpg', img)
        os.system('sudo cp medianimg.jpg /var/www/html/medianimg.jpg')
        cimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        cv2.imwrite('grayimg.jpg', cimg)
        os.system('sudo cp grayimg.jpg /var/www/html/grayimg.jpg')
        eimg = cv2.Canny(cimg, 20, 50)
        cv2.imwrite('edgedimg.jpg', eimg)
        os.system('sudo cp edgedimg.jpg /var/www/html/edgedimg.jpg')

        output = img.copy()

        circles = cv2.HoughCircles(img,
                                   cv2.cv.CV_HOUGH_GRADIENT,
                                   1.25,
                                   45,
                                   param1=40,
                                   param2=26,
                                   minRadius=0,
                                   maxRadius=90)

        x1 = 0
        if circles is not None:
            #convert the (x, y) coordinates and radius of the circles to integers
            circles = np.round(circles[0, :]).astype("int")

            #loop over the (x, y) coordinates and radius of the circles
            for (x, y, r) in circles:
                x1 = x1 + 1
                diameter = (r * 2) / float(7.5)
                print(r)
                cv2.circle(output, (x, y), r, (0, 255, 0), 4)
                cv2.putText(output,
                            str(diameter)[:4], (x - 10, y + 5), 1, 1,
                            (255, 255, 255), 2)

        print "severity: ", x1, " detected"
        diam = (max(circles[:, 2]) * 2) / float(7.5)
        print "float: ", diam
        dArea = float(diam * diam * 3.1416 / 6)
        sArea = str(round(dArea, 2))
        maximum = str(round(diam, 2))
        area.set("Area: " + sArea + "mm^2")
        maxDiameter.set("Max Diameter: " + maximum + "mm")
        cv2.imwrite('output.jpg', output)
        os.system('sudo cp output.jpg /var/www/html/output.jpg')
        print circles[:, 2]
        svm(severity=x1, diameter=diam)

    except:
        result.set("SVM Result: ERROR: No Circle Detected")
Пример #6
0
def start():
    data = pd.read_csv("projetData/red_wines.csv")
    data = clean(data)
    regressionLogistique(data)
    analyseDiscriminanteLineaire(data)
    analyseDiscriminanteQuadratique(data)
    svm(data)
    voisins(data)
    arbre(data)
    test_perceptron(data)
    return data
def runNTimes(times):
    for i in range(0, times):
        voting(evaluation_set)
        adaboost(evaluation_set)
        bagging(evaluation_set)
        stacking(evaluation_set)

        svm(evaluation_set)
        knn(evaluation_set)
        decisionTree(evaluation_set)

    calculateMeanStatistics(times)
Пример #8
0
def main():

    # input = "../Data/glass.data"
    input = "glass.data"
    headers = [
        "Id", "RI", "Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe", "type"
    ]
    data = pd.read_csv(input, names=headers)
    data.drop(["Id"], axis=1, inplace=True)

    # if we'd like to plot all the results, set plot to true
    plot = False
    onePlot = True

    # list for each type of kernel
    kernels = ['linear', 'rbf', 'sigmoid', 'poly']

    if plot:
        onePlot = False
        timeVacc = pdf.PdfPages("Time_VS_Accuracy.pdf")

    # run each type of kernel with both 1v1 and 1vAll
    for k in kernels:
        ovoTime, ovoAccuracy = svm(data,
                                   kernel=k,
                                   classification='ovo',
                                   plot=plot,
                                   onePlot=onePlot)
        ovrTime, ovrAccuracy = svm(data,
                                   kernel=k,
                                   classification='ovr',
                                   plot=plot)
        onePlot = False
        print("*" * 300)

        if plot:
            # plot the time vs the accuracy and save to pdf
            fig = plotTimeVAccuracy(k, ovoTime, ovrTime, ovoAccuracy,
                                    ovrAccuracy)
            timeVacc.savefig(fig, bbox_inches='tight')
            plt.close(fig)

    if plot:
        timeVacc.close()

    # run each type of kernel with 1v1 where the classes are reweighted
    for k in kernels:
        ovoTime, ovoAccuracy = svm(data,
                                   kernel=k,
                                   classification='ovo',
                                   weighted=True,
                                   plot=plot)
def manualSVM():
    try:
        diameter = entDiam.get()
        amount = entAmount.get()
        diam = float(diameter)
        amount = int(amount)
        dArea = float(diam * diam * 3.1416 / 6)
        sArea = str(round(dArea, 2))
        maximum = str(round(diam, 2))
        area.set("Area: " + sArea + "mm^2")
        maxDiameter.set("Max Diameter: " + maximum + "mm")
        svm(severity=amount, diameter=diam)
    except:
        result.set("SVM Result: Input Error")
Пример #10
0
def evaluation(chromosome):
	global best_val
	global valores
	global a
	global b
	global c
	global d
	global e
	global f
	code_comp = chromosome.getCompiledCode()
	features = eval(code_comp)
	cfeatures = len(features)
	matrix_final = reducir(matrix,features)
	evaluated_data = svm(matrix_final)
	total = len(matrix[0])-1
	alfa = 0.5
	beta = 0.4
	gama = 0.1
	valor = ((alfa*(1-evaluated_data[0]))+beta*(1-evaluated_data[1])+gama*(cfeatures/total))/3.0	
   	if(valor < best_val):
		best_val = valor
		valores = []
		valores.append(evaluated_data[0])
		valores.append(evaluated_data[1])
		valores.append(cfeatures)
	fitness.append(valor)
	print 'AUC: ',evaluated_data[0],'ACC: ',evaluated_data[1]
	return valor
Пример #11
0
def run(file):
    data = pd.read_csv(file, encoding='latin-1')

    # filter stop words, punctuation and ignore capital letter
    f = feature_extraction.text.CountVectorizer(stop_words='english')
    X = f.fit_transform(data["message"])

    # split the data to test set and train set
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, data['label'], test_size=0.33)

    # sends the sets to the machine learning algorithms
    adaboost(X_train, X_test, y_train, y_test)
    svm(X_train, X_test, y_train, y_test)
    knn(X_train, X_test, y_train, y_test)
    decisionTree(X_train, X_test, y_train, y_test)
Пример #12
0
def SVM_train_cross(train_x, train_y, validation, test, test_data):
    print("training data...")
    clf_pipe = make_pipeline(CountVectorizer(ngram_range=(1, 2)), RandomUnderSampler(), SVC(kernel='rbf', C=1))
    scores = cross_val_score(clf_pipe, train_x, train_y, cv=5)
    print("Model is fitted!")
    if validation:
        print(scores)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        y_pred = cross_val_predict(clf_pipe, train_x, train_y, cv=5)

        # Evaluation
        # classification report
        print("classification reports:", classification_report(train_y, y_pred))
        print("Finished!")
        # confusion matrix
        conf_mat = confusion_matrix(train_y, y_pred)
        print(conf_mat)
        plot_conf(conf_mat)
    if test:
        svm(test_data)
Пример #13
0
def sup_vec(X_train, y_train):
    pipe = Pipeline([('tfidf', TfidfVectorizer()),
                     ('svm', svm(random_state=42))
                     ])

    parameter = {"tfidf__ngram_range": [(1, 1), (1, 2), (1, 3)],
                 "svm__kernel": ["linear", "poly", "rbf", "sigmoid"],
                 "svm__C": np.logspace(-3, 3, 7)
                 }
    model = GridSearchCV(pipe, param_grid=parameter, cv=5, verbose=1)
    model.fit(X_train, y_train)
    print(f"Best o score : {model.best_score_} with {model.best_params_}")
    return model.best_score_, model.best_params_
Пример #14
0
def train(train_data_features, train_data_labels, classifier, hyperparameter):

    if classifier == 'lr':
        model = LogisticRegression(solver=str(hyperparameter),
                                   class_weight='balanced')

    if classifier == 'rf':
        model = RandomForestClassifier(n_estimators=int(hyperparameter),
                                       class_weight='balanced')

    if classifier == 'knn':
        model = KNeighborsClassifier(n_neighbors=int(hyperparameter))

    if classifier == 'svm':
        model = svm(kernel=str(hyperparameter), class_weight='balanced')

    fit = model.fit(train_data_features, train_data_labels)
    return fit
def main(scores):
    output = DataFrame(data = [])
    for i in scores.keys():
        #print(i)
        #print(scores.get(i))
        value = scores.get(i)
        if value >= 4 :
            prediction = "Positive"
        elif 2 < value < 4 :
            prediction = str(svm(i, data))
            #print(logit(i, data))
        elif 0 < value <= 2 :
            prediction = str(logit(i, data, 0.8))
        elif value <= 0 :
            prediction = "Negative"
        else:
            prediction = "Not calculated"
        print(str(i) + ' ' + prediction)
        row = Series(data = [i, value, prediction])
        output = concat([output, row], axis = 1)
    output = output.T
    output.to_csv("consensus_predictions.txt", sep = '\t')
Пример #16
0
    def _init(self, X, Y):
        if self.bias:
            self.X = np.append(X, np.ones((np.shape(X)[0], 1)), axis=1)
        else:
            self.X = X
        self.Y = np.ravel(Y)
        self.l_index = np.ravel(self.Y != -1)
        self.u_index = ~self.l_index

        if self.estimator is None:
            self.estimator = [
                RandomForestClassifier(),
                LogisticRegression(),
                svm()
            ]

        # Initialize the classifiers
        for est in self.estimator:
            indices = random.sample(
                list(compress(range(len(X)), self.l_index)),
                int(np.sum(self.l_index) * 1))
            est.fit(self.X[indices], self.Y[indices])
Пример #17
0
def main():
    X_train, X_test, y_train, y_test = split()

    #    y_predicted = knn_clasify(X_train, X_test, y_train)
    #    knn_efficiency(y_predicted, y_test)

    linear_regression(X_train, X_test, y_train, y_test)

    y_predicted = logistic_regression(X_train, X_test, y_train)
    logistic_regression_efficiency(y_predicted, y_test)

    y_predicted = svm(X_train, X_test, y_train)
    svm_efficiency(y_predicted, y_test)

    k = 1
    while k < 10:
        y_predicted = svm_kernal(X_train, X_test, y_train, k)
        svm_kernal_efficiency(y_predicted, y_test, k)
        k = k + 1

    y_predicted = svm_rbf(X_train, X_test, y_train)
    svm_rbf_efficiency(y_predicted, y_test)
Пример #18
0
def main():
    # keyword_id clicks conv cost date group hour imps match_type month monthday pos weekday
    data = np.genfromtxt("data.csv", delimiter=",", dtype=None)[1:]

    target = data[:, 2].astype(np.float)  # conv
    data = scipy.delete(data, 2, 1)

    skf = cross_validation.StratifiedKFold(target, n_folds=10)
    for train_index, test_index in skf:
        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]

        X_train, X_test = preprocess(X_train, X_test)
        train_data = np.column_stack([X_train, y_train])
        train_data = np.array([np.array(x) for x in set(tuple(x) for x in train_data)])
        X_train = train_data[:, :-1]
        y_train = train_data[:, -1]
        # predicted_test = random_forest_regressor(X_train,y_train,X_test)
        predicted_test = svm(X_train, y_train, X_test)
        _, _, non_zero_clicks = get_non_zero_clicks(X_test, y_test)
        predicted_test[non_zero_clicks == False] = 0
        model_eval(y_test, predicted_test)
Пример #19
0
def main(argv):
    if FLAGS.dataset == 'toy':
        train_X, train_y, test_X, test_y, num_classes = get_toy_dataset()
    elif FLAGS.dataset == 'mnist':
        train_X, train_y, test_X, test_y, num_classes = get_mnist()

    train_pred = None

    if FLAGS.method == 'knn':
        pred = knn(train_X, train_y, test_X)
    elif FLAGS.method == 'svm':
        train_pred, pred = svm(train_X, train_y, test_X)
    elif FLAGS.method == 'tree':
        pred = tree(train_X, train_y, test_X)
    elif FLAGS.method == 'boosting':
        pred = boosting(train_X, train_y, test_X)
    elif FLAGS.method == 'nn':
        train_pred, pred = nn(train_X, train_y, test_X, num_classes)

    if train_pred is not None:
        print('Train Accuracy: %f' % compute_accuracy(train_pred, train_y))

    print('Accuracy: %f' % compute_accuracy(pred, test_y))
def my_ensemble(X_train, X_test, y_train, y_test):
    y_pred = []
    votes = []
    
    X_train_pca, X_test_pca = preprocess_PCA(X_train, X_test, True, 100)
    votes.append(svm(X_train_pca, X_test_pca, y_train))
    votes.append(mog(X_train_pca, X_test_pca, y_train, 10))
    votes.append(ensemble(X_train, X_test, y_train))
    
    print "*** SVM ***"
    show_metrics(votes[0], y_test)
    print "*** mog ***"
    show_metrics(votes[1], y_test)
    print "*** ensemble ***"
    show_metrics(votes[2], y_test)
    print "*** my_ensemble ***"
    
    for i in range(len(X_test)):
        if votes[1][i] == votes[2][i]:
            y_pred.append(votes[1][i])
        else:
            y_pred.append(votes[0][i])
    
    return y_pred
#%%
# ______________  Naive_Bayes  _______________-

sk = SklearnClassifier(MultinomialNB())
sk.train(train_feats)
acc_Naive_Bayes = accuracy(sk, test_feats)
print("Naive Bayes Accuracy: ", acc_Naive_Bayes)

# ______________  K-Neighbors  _______________-

sk_knn = SklearnClassifier(KNeighborsClassifier())
sk_knn.train(train_feats)
acc_knn = accuracy(sk_knn, test_feats)
print("K-NN Accuracy: ", acc_knn)

# ______________  Regression  _______________-
#%%
sk_reg = SklearnClassifier(LogisticRegression())
sk_reg.train(train_feats)
acc_reg = accuracy(sk_reg, test_feats)
print("Regression Accuracy: ", acc_reg)

# ______________  SVM  _______________-
#%%
sk_svm = SklearnClassifier(svm())
sk_svm.train(train_feats)
acc_svm = accuracy(sk_svm, test_feats)
print("SVM Accuracy: ", acc_svm)

#%%
Пример #22
0
def train_model(classifier, train_path, test_path, type_classification, train=True, validation=True, test=True,
                cross_validation=False):
    # collect train data
    print("reading train set...")
    if type_classification == "T":
        # read titles and their label
        train_x, train_y = collect_titles(train_path)
    elif type_classification == "TB":
        # read whole document
        train_x, train_y = collect_documents(train_path)
    elif type_classification == "TBW":
        # weighted title and body
        train_x, train_y = collect_weighted_doc(train_path)
    else:
        print("wrong argument")
    # if test:
    print("loading test data...")
    test_data, reference = collect_test_documents(test_path)

    # split data
    if not cross_validation:
        print("spliting the train set...")

        train_data, validate_data, train_target, validate_target = train_test_split(train_x, train_y, test_size=0.4,
                                                                                    random_state=0)
        # Naive bayes classifier
        if classifier == "NB":
            # train data set
            if train:
                print("training data...")
                naive_bayes_train(train_data, train_target)
            # validate validation set
            if validation:
                print("evaluating data...")
                naive_bayes_evaluate(validate_data, validate_target)
            # test data
            if test:
                print("testing data...")
                naive_bayes(test_data, reference)
                print("results are written in: \Results\Prediction.xlsx")

        # SVM classifier
        if classifier == "SVM":
            # train data set
            if train:
                print("training data...")
                svm_train(train_data, train_target)
            # validate validation set
            if validation:
                print("evaluating data...")
                svm_evaluate(validate_data, validate_target)
            # test data
            if test:
                print("testing data...")
                svm(test_data, reference)
                print("results are written in: \Results\Prediction.xlsx")

        # Logistic regression
        if classifier == "LR":
            # train data set
            if train:
                print("training data...")
                train_logistic_regression(train_data, train_target)
            # validate validation set
            if validation:
                print("evaluating data...")
                validate_logistic_regression(validate_data, validate_target)
            # test data
            if test:
                print("testing data...")
                logistic_regression(test_data, reference)
                print("results are written in: \Results\Prediction.xlsx")
    # using cross validation
    else:
        if classifier == "NB":
            naive_bayse_cross(train_x, train_y, validation, test, test_data)

        if classifier == "SVM":
            SVM_train_cross(train_x, train_y, validation, test, test_data)
Пример #23
0
from sklearn import datasets
def svm():
#	iris = datasets.load_iris()
#	digits = datasets.load_digits()

	from sklearn import svm
	X = [[0, 0], [1, 1]]
	y = [0, 1]
	clf = svm.SVC()
	clf.fit(X, y)  
	print clf
	print clf.predict([[2., 2.]])
	print clf

	book = open_workbook('Adomain_Substrate.xls')
	worksheet = book.sheet_by_name('Adomain_Substrate')

	

	print worksheet

	#SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
	#gamma=0.0, kernel='rbf', max_iter=-1, probability=False, random_state=None,
	#shrinking=True, tol=0.001, verbose=False)




if __name__ == "__main__":
	svm()
 	#sys.exit(main())
def pca_svm(X_train, X_test, y_train, use_unlabelled=True, pca_components=150, fraction=2):
    X_train, X_test = preprocess_PCA(X_train, X_test, use_unlabelled, pca_components, fraction)
    return svm(X_train, X_test, y_train)
              max_iter=-1,
              probability=False,
              random_state=None,
              shrinking=True,
              tol=0.001,
              verbose=False)
    scaleX(xVal)
    scaleX(xTest)
    #xVal, xTest = pca2(xVal, xTest)
    #print xVal[0]

    #print xVal[0]

    #print yVal
    clf.fit(xVal, yVal)
    yTest2 = []
    ans = 0
    yTest = (clf.predict(xVal))
    print 'Classification on Traning Data'
    print yTest
    yTest2 = (clf.predict(xTest))
    print 'Classification on Test Data'
    print yTest2
    return yTest2


trainData = loadDataSet('training.csv')
testData = loadDataSet('test.csv')
ytrainData = getY()
ans = svm(trainData, ytrainData, testData)
def repeatedCrossValidation(normal_set, anom_set, k, repetitions):
    k_fold = KFold(n_splits=k)

    # Repete o processo K = 5 vezes
    original_set = normal_set + anom_set

    voting_statistics = []
    adaboost_statistics = []
    bagging_statistics = []
    stacking_statistics = []
    svm_statistics = []
    knn_statistics = []
    dt_statistics = []

    voting_time = []
    adaboost_time = []
    bagging_time = []
    stacking_time = []
    svm_time = []
    knn_time = []
    dt_time = []
    mlp_time = []

    technique_name = [
        '- Voting -', '- AdaBoost -', '- Bagging -', '- Stacking -', '- SVM -',
        '- KNN -', '- DT -', '- MLP -'
    ]
    j = 0

    for i in range(repetitions):
        shuffle(original_set)

        for train_indices, test_indices in k_fold.split(original_set):
            train_set = []
            train_set_classification = []
            test_set = []

            normal_flows_in_evaluation_set = 0
            anom_flows_in_evaluation_set = 0

            # Separa o conjuntos de treinamento e a correspondente classificação
            for index in train_indices:
                train_set.append(original_set[index])

                if original_set[index] in anom_set:
                    train_set_classification.append(1)
                else:
                    train_set_classification.append(0)

            # Separa o conjunto de avaliação
            for index in test_indices:
                test_set.append(original_set[index])

                # Contabiliza número de fluxos anômalos e normais no conjunto
                # de avaliação (utilizado para estatísticas)
                if original_set[index] in anom_set:
                    anom_flows_in_evaluation_set = anom_flows_in_evaluation_set + 1
                else:
                    normal_flows_in_evaluation_set = normal_flows_in_evaluation_set + 1

            # Treinamento
            classifier_knn.fit(train_set, train_set_classification)
            classifier_svm.fit(train_set, train_set_classification)
            classifier_dt.fit(train_set, train_set_classification)
            classifier_mlp.fit(train_set, train_set_classification)

            # # Avaliação
            start_time = time.time()
            predictions = voting(train_set, train_set_classification,
                                 test_set[0:20])
            time_spent = time.time() - start_time
            voting_time.append(time_spent)

            start_time = time.time()
            predictions = adaboost(train_set, train_set_classification,
                                   test_set[0:20])
            time_spent = time.time() - start_time
            adaboost_time.append(time_spent)

            start_time = time.time()
            predictions = bagging(train_set, train_set_classification,
                                  test_set[0:20])
            time_spent = time.time() - start_time
            bagging_time.append(time_spent)

            start_time = time.time()
            predictions = stacking(train_set, train_set_classification,
                                   test_set[0:20])
            time_spent = time.time() - start_time
            stacking_time.append(time_spent)

            start_time = time.time()
            predictions = svm(test_set[0:20])
            time_spent = time.time() - start_time
            svm_time.append(time_spent)

            start_time = time.time()
            predictions = knn(test_set[0:20])
            time_spent = time.time() - start_time
            knn_time.append(time_spent)

            start_time = time.time()
            predictions = decisionTree(test_set[0:20])
            time_spent = time.time() - start_time
            dt_time.append(time_spent)

            start_time = time.time()
            predictions = neuralNetwork(test_set[0:1])
            time_spent = time.time() - start_time
            mlp_time.append(time_spent)

    getTimeMeasurementsPerInstance(voting_time, adaboost_time, bagging_time,
                                   stacking_time, svm_time, knn_time, dt_time,
                                   mlp_time)
def main():
    # Read data
    df = pd.read_table('../titanic/train.csv', sep=",")
    # print(df.head())

    # ------------------------------------------------------------------------------------------

    # Store 'in sample' and  'out of sample' errors: arrays for result df
    E_in = []
    E_out = []
    Model_name = []
    Model_id = []
    iFeatures = []
    Features = []

    # ------------------------------------------------------------------------------------------
    # Preprocessing data
    
    # Set all features
    df['Age'].fillna(0, inplace=True)
    df['Pclass'].fillna(0, inplace=True)
    df['Fare'].fillna(0., inplace=True)
    df['SibSp'].fillna(0., inplace=True)
    df['Parch'].fillna(0., inplace=True)

    df['Sex'].fillna('no', inplace=True)
    df['sex_'] = df['Sex'].map( {'female': 0, 'male': 1, 'no': 2} ).astype(int)

    df['Embarked'].fillna('N', inplace=True)
    df['embarked_'] = df['Embarked'].map( {'N': 0., 'C': 1., 'S': 2., 'Q': 3.} ).astype(float)


    # -------------------------------------------------
    # Slightly more advanced feature extraction

    df['Cabin'].fillna('no', inplace=True)
    def prep_cabin(row):
        res = 0
        if row.lower().find('a')>=0:
            return 1.
        elif row.lower().find('b')>=0:
            return 2.
        elif row.lower().find('c')>=0:
            return 3.
        elif row.lower().find('d')>=0:
            return 4.
        elif row.lower().find('e')>=0:
            return 5.
        elif row.lower().find('f')>=0:
            return 6.
        elif row.lower().find('g')>=0:
            return 7.
        elif row.lower().find('h')>=0:
            return 8.
        return res
    
    df['cabin_'] = df['Cabin'].apply(lambda r: prep_cabin(r))
    #print(df[['Cabin','cabin_']].head(20))
    #exit()

    df['Name'].fillna('no', inplace=True)
    def prep_name(row):
        res = 0
        if row.lower().find('miss.')>=0:
            return 1.
        elif row.lower().find('mrs.')>=0:
            return 2.
        elif row.lower().find('mr.')>=0:
            return 3.
        elif row.lower().find('master')>=0:
            return 4.
        return res
    
    df['name_'] = df['Name'].apply(lambda r: prep_name(r))
    #print(df[['Name','name_']].head(20))
    #exit()

    # -----


    


    # ---
    # Metrics for knn:
    from sklearn import preprocessing

    #scaler = preprocessing.StandardScaler().fit_transform(df['Fare'].values)
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['Fare'].astype(float).values)
    df['fare_'] = pd.Series(scale)
    #print(df[['Fare','fare_']].head(20))
    
    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['Age'].astype(float).values)
    df['age_'] = pd.Series(scale)
    #print(df[['Age','age_']].head(20))
    #exit()

    # ---
    #scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['Pclass'].astype(int).values)
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['Pclass'].astype(float).values)
    df['pclass_'] = pd.Series(scale)
    #print(df[['Pclass','pclass_']].head(20))
    
    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['Parch'].astype(float).values)
    df['parch_'] = pd.Series(scale)
    #print(df[['Parch','parch_']].head(20))

    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['SibSp'].astype(float).values)
    df['sibsp_'] = pd.Series(scale)
    #print(df[['SibSp','sibsp_']].head(20))

    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['embarked_'].astype(float).values)
    df['embarked_'] = pd.Series(scale)
    #print(df[['Embarked','embarked_']].head(20))

    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['cabin_'].astype(float).values)
    df['cabin_'] = pd.Series(scale)
    print(df[['Cabin','cabin_']].head(20))

    # ---
    scale = preprocessing.MinMaxScaler(feature_range=(0,1)).fit_transform(df['name_'].astype(float).values)
    df['name_'] = pd.Series(scale)
    #print(df[['Name','name_']].head(20))

    #exit()

    # ---

    #feature_names = np.array(['sex_', 'Fare'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'Fare', 'Age', 'Parch', 'SibSp'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'Age', 'Parch', 'SibSp'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'age_', 'Pclass', 'Parch', 'SibSp'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'Parch', 'SibSp'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'parch_', 'SibSp'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'parch_', 'SibSp','embarked_'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'Fare', 'Age', 'Pclass', 'Parch', 'SibSp','embarked_'])  #['Pclass', 'Fare', ])
    #feature_names = np.array(['sex_', 'fare_', 'pclass_', 'parch_', 'sibsp_']) 
    #feature_names = np.array(['sex_', 'fare_', 'pclass_']) 
    #feature_names = np.array(['sex_', 'fare_']) 
    #feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'parch_', 'sibsp_','embarked_'])  
    ###feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'parch_', 'sibsp_'])  
    #feature_names = np.array(['sex_', 'fare_', 'pclass_','name_','cabin_']) 
    feature_names = np.array(['sex_', 'fare_', 'age_', 'pclass_', 'name_', 'cabin_', 'parch_', 'sibsp_'])  
    
    label_name = ['Survived']

    # Generate feature combinations
    print(feature_names)
    fc = []
    flen = len(feature_names)
    for fl in np.arange(flen)+1:
        c = itertools.combinations(feature_names, fl)
        # check:
        for s in c:
            #print(s)
            fc.append(np.array(s))

            
    Feature_list = fc
    lFl = len(fc)
    #for f in Feature_list:
    #    print(f) 

    #exit()

    feat_c = 0
    # Run over all feature combinations on different classifiers
    for fn in Feature_list:
        # KNN model: Run over some NN 
        for nn in (1,5,10,15):
            e_in,e_out = knn(df,label_name,fn,lFl,feat_c,n_neighbors=nn)
            Model_name.append('KNN k'+str(nn))
            E_in.append(e_in)
            E_out.append(e_out)
            iFeatures.append(feat_c)
            Features.append(fn)

        # DTree
        e_in,e_out = dtree(df,label_name,fn,lFl,feat_c)
        Model_name.append('DTree')
        E_in.append(e_in)
        E_out.append(e_out)
        iFeatures.append(feat_c)
        Features.append(fn)

        # SVM
        #e_in,e_out = svm(df,label_name,fn,lFl,feat_c,C=10.)
        e_in,e_out = svm(df,label_name,fn,lFl,feat_c,C=1000.)
        #e_in,e_out = svm(df,label_name,fn,lFl,feat_c,C=100000.)
        Model_name.append('SVM')
        E_in.append(e_in)
        E_out.append(e_out)
        iFeatures.append(feat_c)
        Features.append(fn)

        # RF
        e_in,e_out = rf(df,label_name,fn,lFl,feat_c,n_estimators=100)
        #e_in,e_out = rf(df,label_name,fn,lFl,feat_c,n_estimators=1000)
        Model_name.append('RF')
        E_in.append(e_in)
        E_out.append(e_out)
        iFeatures.append(feat_c)
        Features.append(fn)

        # http://scikit-learn.org/stable/modules/ensemble.html#adaboost
        # 200 estimators looked better
        # adaboost
        e_in,e_out = adaboost(df,label_name,fn,lFl,feat_c,n_estimators=200)
        Model_name.append('adaboost')
        E_in.append(e_in)
        E_out.append(e_out)
        iFeatures.append(feat_c)
        Features.append(fn)

        feat_c += 1
        pass

    # ------------------------------------------------------------------------------------------
    # Fill results dataframe
    
    Model_id = np.arange(len(E_in))
    #print(Feature_list)
    #print(Model_id)
    #print(Model_name)
    #print(E_in)
    #print(E_out)

    #modeldf = pd.DataFrame({'Name': Model_name, 'E_in': E_in, 'E_out': E_out, 'iFeatures': iFeatures},index=Model_id)
    modeldf = pd.DataFrame({'Name': Model_name, 'E_in': E_in, 'E_out': E_out, 'Features': Features},index=Model_id)

    # Sort by best performing models
    modeldf.sort(columns=['E_out','E_in'],ascending=[1,1],inplace=True)
    #print(modeldf.head())
    
    modeldf.to_csv('result_brute_test.csv')
    
    # ------------------------------------------------------------------------------------------
    # Print out best performing models
    #nbest = 10
    ##best_model = modeldf.ix[modeldf['E_out'].argmin()]
    #best_model = modeldf.ix[modeldf['E_out'].argsort().values[:nbest]]

    print(modeldf.head(20)) # print best models
    #print(best_model)
    #for i in best_model['iFeatures'].values:
    #    print(i,Feature_list[i])

    # ------------------------------------------------------------------------------------------
    # Plot performance vs models
    plt.rc('text', usetex=True)
    line_ein = plt.plot(Model_id,np.array(E_in)*100.,label=r'$E_{in}$ #("in sample")')
    line_eout = plt.plot(Model_id,np.array(E_out)*100.,label=r'$E_{out}$ ("out of sample")')
    plt.title('')
    plt.xlabel('Model Id')
    plt.ylabel('Error Rate (\%)')
    plt.legend(handles=[line_ein, line_eout],labels=['',''])
def forecast():

	global selected_algorithm, selected_optimizer, image_name
	selected_algorithm = request.form.get('alist')
	selected_optimizer = request.form.get('olist')
	image_name = "default"

	chart_list=glob.glob("static/images/*.png")
	for chart in chart_list:
  		os.remove(chart)

	selected_file = request.form.get('flist')
	if selected_file == "default":
		#selected_algorithm = " "
		filelist = os.listdir( "D:\\uploads" )
		return render_template('upload2.html', filenames = filelist, image_name= image_name, selected_algorithm=selected_algorithm, selected_optimizer=selected_optimizer, error = True)
	print(selected_file)

	file_path = "data\\" + selected_file
	print("file path - {}".format(file_path))
	energyData=pd.read_csv(file_path,header=0)

	def lstm(energyData):

		energyData.head()

		energyData=energyData.drop(['REGION'],axis=1)

		energyData=energyData.drop(['PERIODTYPE'],axis=1)
		energyData=energyData.drop(['RRP( Regional reference price)'],axis=1)
		#energyData=energyData.drop(['TOTALDEMAND'],axis=1)

		energyData=energyData.rename(columns={"RRP( Regional reference price)" : "RRP"})

		energyData.tail()

		def univariate_data(dataset, start_index, end_index, history_size, target_size):
		  data = []
		  labels = []

		  start_index = start_index + history_size
		  if end_index is None:
		    end_index = len(dataset) - target_size

		  for i in range(start_index, end_index):
		    indices = range(i-history_size, i)
		    # Reshape data from (history_size,) to (history_size, 1)
		    data.append(np.reshape(dataset[indices], (history_size, 1)))
		    labels.append(dataset[i+target_size])
		  return np.array(data), np.array(labels)

		TRAIN_SPLIT = 1339

		tf.random.set_random_seed(13)

		uni_data = energyData['TOTALDEMAND']
		uni_data.index = energyData['SETTLEMENTDATE']
		uni_data.head()
		uni_data = uni_data.values
		uni_train_mean = uni_data[:TRAIN_SPLIT].mean()
		uni_train_std = uni_data[:TRAIN_SPLIT].std()

		uni_data = (uni_data-uni_train_mean)/uni_train_std

		univariate_past_history = 20
		univariate_future_target = 0

		x_train_uni, y_train_uni = univariate_data(uni_data, 0, TRAIN_SPLIT,
		                                           univariate_past_history,
		                                           univariate_future_target)
		x_val_uni, y_val_uni = univariate_data(uni_data, TRAIN_SPLIT, None,
		                                       univariate_past_history,
		                                       univariate_future_target)
		print(x_train_uni.shape[-2:])

		print ('Single window of past history')
		print (x_train_uni[0])
		print ('\n Target Demand to predict')
		print (y_train_uni[0])

		from keras.layers import LSTM
		simple_lstm_model = Sequential()
		simple_lstm_model.add(LSTM(50, input_shape=x_train_uni.shape[-2:]))
		simple_lstm_model.add(Dense(1))

		simple_lstm_model.compile(optimizer='adam', loss='mae')

		print(simple_lstm_model.predict(x_val_uni).shape)

		EVALUATION_INTERVAL = 200
		EPOCHS = 10

		simple_lstm_model.fit(x_train_uni,y_train_uni, epochs=EPOCHS,
		                      steps_per_epoch=EVALUATION_INTERVAL,
		                      validation_data=(x_val_uni,y_val_uni), validation_steps=50)

		y_pred=simple_lstm_model.predict(x_val_uni)

		plt.plot(y_val_uni, color = 'red', label = 'Actual Energy Demand')
		plt.plot(y_pred, color = 'blue', label = 'Predicted Energy Demand')
		plt.title('Energy demand- Actual vs Predicted ')
		plt.ylabel('Demand')
		plt.legend()

		val = random.randrange(1,500)
		global image_name
		image_name = "chart"+str(val)+'.png'
		plt.savefig(os.path.join(os.getcwd(), 'static/images',image_name), format='png')

		from sklearn.metrics import mean_squared_error
		from math import sqrt

		rms = sqrt(mean_squared_error(y_val_uni, y_pred))
		global RMSE
		RMSE=rms
		print(rms)

	def lstm2(energyData):

		energyData.head()

		energyData=energyData.drop(['REGION'],axis=1)

		energyData=energyData.drop(['PERIODTYPE'],axis=1)
		energyData=energyData.drop(['RRP( Regional reference price)'],axis=1)
		#energyData=energyData.drop(['TOTALDEMAND'],axis=1)

		energyData=energyData.rename(columns={"RRP( Regional reference price)" : "RRP"})

		energyData.tail()

		def univariate_data(dataset, start_index, end_index, history_size, target_size):
		  data = []
		  labels = []

		  start_index = start_index + history_size
		  if end_index is None:
		    end_index = len(dataset) - target_size

		  for i in range(start_index, end_index):
		    indices = range(i-history_size, i)
		    # Reshape data from (history_size,) to (history_size, 1)
		    data.append(np.reshape(dataset[indices], (history_size, 1)))
		    labels.append(dataset[i+target_size])
		  return np.array(data), np.array(labels)

		TRAIN_SPLIT = 1339

		tf.random.set_random_seed(13)

		uni_data = energyData['TOTALDEMAND']
		uni_data.index = energyData['SETTLEMENTDATE']
		uni_data.head()
		uni_data = uni_data.values
		uni_train_mean = uni_data[:TRAIN_SPLIT].mean()
		uni_train_std = uni_data[:TRAIN_SPLIT].std()

		uni_data = (uni_data-uni_train_mean)/uni_train_std

		univariate_past_history = 20
		univariate_future_target = 0

		x_train_uni, y_train_uni = univariate_data(uni_data, 0, TRAIN_SPLIT,
		                                           univariate_past_history,
		                                           univariate_future_target)
		x_val_uni, y_val_uni = univariate_data(uni_data, TRAIN_SPLIT, None,
		                                       univariate_past_history,
		                                       univariate_future_target)
		print(x_train_uni.shape[-2:])

		print ('Single window of past history')
		print (x_train_uni[0])
		print ('\n Target Demand to predict')
		print (y_train_uni[0])

		from keras.layers import LSTM
		simple_lstm_model = Sequential()
		simple_lstm_model.add(LSTM(50, input_shape=x_train_uni.shape[-2:]))
		simple_lstm_model.add(Dense(1))

		simple_lstm_model.compile(optimizer=selected_optimizer, loss='mae')

		print(simple_lstm_model.predict(x_val_uni).shape)

		EVALUATION_INTERVAL = 200
		EPOCHS = 10

		simple_lstm_model.fit(x_train_uni,y_train_uni, epochs=EPOCHS,
		                      steps_per_epoch=EVALUATION_INTERVAL,
		                      validation_data=(x_val_uni,y_val_uni), validation_steps=50)

		y_pred=simple_lstm_model.predict(x_val_uni)

		plt.plot(y_val_uni, color = 'red', label = 'Actual Energy Demand')
		plt.plot(y_pred, color = 'blue', label = 'Predicted Energy Demand')
		plt.title('Energy demand- Actual vs Predicted ')
		plt.ylabel('Demand')
		plt.legend()

		val = random.randrange(1,500)
		global image_name
		image_name = "chart"+str(val)+'.png'
		plt.savefig(os.path.join(os.getcwd(), 'static/images',image_name), format='png')

		from sklearn.metrics import mean_squared_error
		from math import sqrt

		rms = sqrt(mean_squared_error(y_val_uni, y_pred))
		global RMSE
		RMSE=rms
		print(rms)

	def Gradient_booster(energyData):

		df = energyData[['SETTLEMENTDATE', 'TOTALDEMAND']]

		df['SETTLEMENTDATE'] =  pd.to_datetime(df['SETTLEMENTDATE'], dayfirst=True)

		print(df.head)
		print (df.dtypes)

		from sklearn import preprocessing
		minmaxscaler=preprocessing.MinMaxScaler()

		array_y=np.array(df['TOTALDEMAND'])

		normalized_y=minmaxscaler.fit_transform(array_y.reshape(-1,1))

		X = np.array(df['SETTLEMENTDATE'])
		X=X.astype(float)

		from sklearn.model_selection import train_test_split
		xTrain, xTest, yTrain, yTest = train_test_split(X, normalized_y, test_size = 0.2, random_state = 0)

		from sklearn.ensemble import GradientBoostingRegressor
		gbrt=GradientBoostingRegressor(n_estimators=150,learning_rate=0.02,subsample=.5,max_depth=8)
		gbrt.fit(xTrain.reshape(-1, 1), yTrain.reshape(-1, 1))

		y_pred=gbrt.predict(xTest.reshape(-1, 1))

		plt.figure(figsize=(9,6))
		plt.plot(minmaxscaler.inverse_transform(yTest.reshape(-1,1)), color='red', label = 'Actual Energy Demand')
		plt.plot(minmaxscaler.inverse_transform(y_pred.reshape(-1,1)), color='blue', label = 'Predicted Energy Demand')
		plt.title('Energy demand- Actual vs Predicted ')
		plt.ylabel('Demand')
		plt.legend()

		val = random.randrange(1,500)
		global image_name
		image_name = "chart"+str(val)+'.png'
		plt.savefig(os.path.join(os.getcwd(), 'static/images',image_name), format='png')

		from sklearn.metrics import mean_squared_error
		from math import sqrt
		Grms = sqrt(mean_squared_error(yTest, y_pred))
		print("Root Mean Square error : {}".format(Grms))
		global RMSE
		RMSE=Grms

		from sklearn.metrics import mean_absolute_error
		mean_absolute_error(yTest, y_pred)

	def decision_tree(energyData):
		df = energyData[['SETTLEMENTDATE', 'TOTALDEMAND']]
		df['SETTLEMENTDATE'] =  pd.to_datetime(df['SETTLEMENTDATE'], dayfirst=True)

		from sklearn import preprocessing
		minmaxscaler=preprocessing.MinMaxScaler()
		array_y=np.array(df['TOTALDEMAND'])
		normalized_y=minmaxscaler.fit_transform(array_y.reshape(-1,1))

		X = np.array(df['SETTLEMENTDATE'])
		X=X.astype(float)

		from sklearn.model_selection import train_test_split
		xTrain, xTest, yTrain, yTest = train_test_split(X, normalized_y, test_size = 0.2, random_state = 0)

		from sklearn.tree import DecisionTreeRegressor
		regressor = DecisionTreeRegressor(random_state = 0, max_depth=75,min_samples_split=2,
		                           max_leaf_nodes=None)
		regressor.fit(xTrain.reshape(-1,1), yTrain.reshape(-1,1))
		y_predict=regressor.predict(xTest.reshape(-1,1))

		plt.figure(figsize=(9,6))
		plt.plot(minmaxscaler.inverse_transform(yTest.reshape(-1,1)), color='red', label = 'Actual Energy Demand')
		plt.plot(minmaxscaler.inverse_transform(y_predict.reshape(-1,1)), color='blue', label = 'Predicted Energy Demand')
		plt.title('Energy demand- Actual vs Predicted ')
		plt.ylabel('Demand')
		plt.legend()

		val = random.randrange(1,500)
		global image_name
		image_name = "chart"+str(val)+'.png'
		plt.savefig(os.path.join(os.getcwd(), 'static/images',image_name), format='png')

		from math import sqrt
		Drms= sqrt(mean_squared_error(yTest, y_predict))
		print("Root Mean Square Error : {}".format(Drms))

		global RMSE
		RMSE=Drms

		from sklearn.metrics import mean_absolute_error
		mean_absolute_error(yTest, y_predict)

	def svm(file_path):
		fontsize = 18
		data_file = pd.read_csv(file_path,parse_dates=True, index_col=1)
		data_file=data_file.drop(['REGION'],axis=1)
		data_file=data_file.drop(['PERIODTYPE'],axis=1)

		## Set weekends and holidays to 1, otherwise 0
		data_file['Atypical_Day'] = np.zeros(len(data_file['TOTALDEMAND']))

		# Weekends
		data_file['Atypical_Day'][(data_file.index.dayofweek==5)|(data_file.index.dayofweek==6)] = 1

		data_file.head(50)

		# Create new column for each hour of day, assign 1 if index.hour is corresponding hour of column, 0 otherwise

		for i in range(0,48):
		    data_file[i] = np.zeros(len(data_file['TOTALDEMAND']))
		    data_file[i][data_file.index.hour==i] = 1

		# Example 3am
		data_file[3][:6]

		# Add historic usage to each X vector

		# Set number of hours prediction is in advance
		n_hours_advance = 1

		# Set number of historic hours used
		n_hours_window = 6


		for k in range(n_hours_advance,n_hours_advance+n_hours_window):

		    data_file['TOTALDEMAND_t-%i'% k] = np.zeros(len(data_file['TOTALDEMAND']))


		for i in range(n_hours_advance+n_hours_window,len(data_file['TOTALDEMAND'])):

		    for j in range(n_hours_advance,n_hours_advance+n_hours_window):

		        data_file['TOTALDEMAND_t-%i'% j][i] = data_file['TOTALDEMAND'][i-j]


		# Define training and testing periods
		train_start = '1-march-2019'
		train_end = '23-march-2019'
		test_start = '24-march-2019'
		test_end = '1-april-2019'

		# Split up into training and testing sets (still in Pandas dataframes)

		X_train_df = data_file[train_start:train_end]
		y_train_df = data_file['TOTALDEMAND'][train_start:train_end]

		X_test_df = data_file[test_start:test_end]
		y_test_df = data_file['TOTALDEMAND'][test_start:test_end]

		N_train = len(X_train_df[0])
		print ('Number of observations in the training set: ', N_train)

		# Numpy arrays for sklearn
		X_train = np.array(X_train_df)
		X_test = np.array(X_test_df)
		y_train = np.array(y_train_df)
		y_test = np.array(y_test_df)

		from sklearn import preprocessing as pre
		from sklearn import svm
		scaler = pre.StandardScaler().fit(X_train)
		X_train_scaled = scaler.transform(X_train)
		X_test_scaled = scaler.transform(X_test)

		SVR_model = svm.SVR(kernel='rbf',C=100,gamma=.001).fit(X_train_scaled,y_train)
		print ('Testing R^2 =', round(SVR_model.score(X_test_scaled,y_test),3))

		# Use SVR model to calculate predicted next-hour usage
		predict_y_array = SVR_model.predict(X_test_scaled)

		# Put it in a Pandas dataframe for ease of use
		predict_y = pd.DataFrame(predict_y_array,columns=['TOTALDEMAND'])
		predict_y.index = X_test_df.index

		### Plot daily total kWh over testing period
		y_test_barplot_df = pd.DataFrame(y_test_df,columns=['TOTALDEMAND'])
		y_test_barplot_df['Predicted'] = predict_y['TOTALDEMAND']

		fig = plt.figure(figsize=[11,7])
		ax = fig.add_subplot(111)
		y_test_barplot_df.plot(kind='line',ax=ax,color=['red','blue'])
		ax.grid(False)
		ax.set_ylabel('Electricity Demand(kWh)', fontsize=fontsize)
		ax.set_xlabel('')
		# Pandas/Matplotlib bar graphs convert xaxis to floats, so need a hack to get datetimes back
		ax.set_xticklabels([dt.strftime('%b %d') for dt in y_test_df.index.to_pydatetime()],rotation=0, fontsize=fontsize)
		plt.title('Energy demand- Actual vs Predicted ')
		plt.legend(fontsize=fontsize)

		val = random.randrange(1,500)
		global image_name
		image_name = "chart"+str(val)+'.png'
		plt.savefig(os.path.join(os.getcwd(), 'static/images',image_name), format='png')

		from sklearn.metrics import mean_squared_error
		from math import sqrt

		SvmRms = sqrt(mean_squared_error(y_test_df,predict_y))
		global RMSE
		RMSE=SvmRms

		print("Root Mean Square Error : {}".format(SvmRms))

	if selected_algorithm == "LSTM":
		lstm(energyData)
	elif selected_algorithm == "Gradient_Booster":
		Gradient_booster(energyData)
	elif selected_algorithm == "Decision_Tree":
		decision_tree(energyData)
	elif selected_algorithm == "SVM":
		svm(file_path)
	elif selected_algorithm == "LSTM2":
		lstm2(energyData)
	return redirect('/')
            y_pred = clf.predict(x_test)
            # print y_pred
            f1.append(f1_score(y_test, y_pred, average='weighted'))
            # print sum(int(f1))
        f_average.append((sum(f1) / (len(f1))))

    print f_average

    C=[0.01,0.1,1,10,100]

    plt.title("SVM")
    plt.plot(C,f_average)

    plt.show()
    return f_average
f_average_svm=svm()

def gini():
    classifiers_DT = [DecisionTreeClassifier(criterion="gini", max_leaf_nodes=2),DecisionTreeClassifier(criterion="gini", max_leaf_nodes=5),
                      DecisionTreeClassifier(criterion="gini", max_leaf_nodes=10),
                      DecisionTreeClassifier(criterion="gini", max_leaf_nodes=20)]
    DT_name = "Decision Tree"
    f1 = []
    f_average=[]

    for name, clf in zip(DT_name, classifiers_DT):
        for train_index ,test_index in kf.split(df):
            x_train,x_test=X[train_index],X[test_index]
            y_train,y_test=Y[train_index],Y[test_index]

        # Prepare the plot for this classifier
Пример #30
0
    print("Accuracy Test Data:", metrics.accuracy_score(y_train, y_hat))
    print("Accuracy Test Data:", metrics.accuracy_score(y_test, y_pred))

    print("Precision:", metrics.precision_score(y_test, y_pred))

    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    y_pred_rf = svclassifier.predict_proba(X_test)[:, 1]
    fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test, y_pred_rf)
    return fpr_rf, tpr_rf, thresholds_rf


fpr_keras, tpr_keras, thresholds_keras = neural_net()
auc_neural = auc(fpr_keras, tpr_keras)
fpr_rf, tpr_rf, thresholds_rf = svm()
auc_SVM = auc(fpr_rf, tpr_rf)


def roc_cruve():
    plt.figure(1)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr_keras,
             tpr_keras,
             label='Neural Net (area = {:.3f})'.format(auc_neural))
    plt.plot(fpr_rf, tpr_rf, label='SVM (area = {:.3f})'.format(auc_SVM))
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve')
    plt.legend(loc='best')
    plt.show()
Пример #31
0
    fig.suptitle('Predicted versus actual labels',
                 fontsize=14,
                 fontweight='bold')

    # Show the plot
    plt.show()


if __name__ == '__main__':

    # Preprocess data
    #
    # shift the distribution of each attribute to have a mean of zero and a standard deviation of one (unit variance)
    #
    from sklearn.preprocessing import scale

    # Apply `scale()` to the `digits` data
    data = scale(digits.data)

    #
    # Split data into training and test sets
    #
    from sklearn.cross_validation import train_test_split

    # Split the `digits` data into training and test sets
    X_train, X_test, y_train, y_test, images_train, images_test = train_test_split(
        data, digits.target, digits.images, test_size=0.25, random_state=42)

    # Use either k_means() or svm()
    svm()
Пример #32
0
def main():
    print("Starting application..\n")
    traffic = normalize(filename_adj)
    print("1 - Display the entropy list")
    print("2 - Display the information gain list")
    print("3 - Display the chi-squared list")
    print("4 - Display the ReliefF list")
    selection = input("Enter your selection: ")
    #num_features = input("Enter the number of features to select: ")
    if (selection == 1):
        ent_list(traffic)
    elif (selection == 2):
        #sorted_gain_list = ig_list(traffic)
        sorted_gain_list = [
            'land', 'urgent', 'wrong_fragment', 'rerror_rate',
            'srv_rerror_rate', 'count', 'dst_host_rerror_rate',
            'dst_host_srv_rerror_rate', 'duration', 'flag', 'serror_rate',
            'srv_serror_rate', 'dst_host_serror_rate',
            'dst_host_srv_serror_rate', 'diff_srv_rate', 'same_srv_rate',
            'dst_host_same_srv_rate', 'srv_diff_host_rate',
            'dst_host_diff_srv_rate', 'dst_host_srv_count',
            'dst_host_srv_diff_host_rate', 'dst_host_count', 'protocol_type',
            'srv_count', 'dst_host_same_src_port_rate', 'dst_bytes', 'service',
            'src_bytes'
        ]
        print("Features selected using Information Gain.")
    elif (selection == 3):
        #sorted_chi2_list = chi2_list()
        print("Features selected using Chi-squared.")
        sorted_chi2_list = [
            'dst_host_same_srv_rate', 'count', 'rerror_rate',
            'srv_rerror_rate', 'same_srv_rate', 'wrong_fragment',
            'dst_host_rerror_rate', 'diff_srv_rate',
            'dst_host_srv_rerror_rate', 'dst_host_diff_srv_rate',
            'serror_rate', 'srv_serror_rate', 'dst_host_serror_rate',
            'dst_host_srv_serror_rate', 'flag', 'dst_host_same_src_port_rate',
            'protocol_type', 'srv_diff_host_rate',
            'dst_host_srv_diff_host_rate', 'dst_host_srv_count', 'service',
            'land', 'urgent', 'dst_host_count', 'srv_count', 'duration',
            'src_bytes', 'dst_bytes'
        ]
    elif (selection == 4):
        #X,Y = create_dataframe()
        #obj = reliefF()
        #sorted_reliefF_list = obj.fit_transform(X, Y)
        sorted_reliefF_list = [
            'srv_count', 'src_bytes', 'dst_bytes', 'dst_host_count',
            'srv_diff_host_rate', 'dst_host_srv_diff_host_rate',
            'dst_host_same_src_port_rate', 'dst_host_srv_count',
            'dst_host_diff_srv_rate', 'dst_host_same_srv_rate',
            'same_srv_rate', 'diff_srv_rate', 'duration',
            'dst_host_srv_rerror_rate', 'dst_host_rerror_rate',
            'dst_host_srv_serror_rate', 'dst_host_serror_rate', 'service',
            'serror_rate', 'srv_serror_rate', 'srv_rerror_rate', 'rerror_rate',
            'flag', 'count', 'protocol_type', 'land', 'urgent',
            'wrong_fragment'
        ]
        print("Features selected using reliefF.")
    else:
        print("Invalid selection")
    print("\n")
    print("*****************")
    print("Classification")
    print("*****************")
    print("1 - Naive Bayes ")
    print("2 - SVM")
    print("3 - Decision Tree")
    print("4 - Random Forest")
    clx_selection = input("Enter your selection: ")
    book = xlwt.Workbook(encoding="utf-8")
    sheet1 = book.add_sheet("Sheet 1")
    sheet1.write(0, 2, "Accuracy Values")
    for i in range(1, 2):
        num_features = i
        print("Number of features selected - ", i)
        if (selection == 2 and clx_selection == 1):
            print("Classifying using Naive Bayes...")
            accuracy = naive_bayes(sorted_gain_list, num_features)
            sheet1.write(i, 2, accuracy)
        elif (selection == 3 and clx_selection == 1):
            print("Classifying using Naive Bayes...")
            accuracy = naive_bayes(sorted_chi2_list, num_features)
            sheet1.write(i, 5, accuracy)
        elif (selection == 4 and clx_selection == 1):
            print("Classifying using Naive Bayes...")
            accuracy = naive_bayes(sorted_reliefF_list, num_features)
            sheet1.write(i, 2, accuracy)

        elif (selection == 2 and clx_selection == 2):
            print("Classifying using SVM...")
            accuracy = svm(sorted_gain_list, num_features)
            sheet1.write(i, 3, accuracy)
        elif (selection == 3 and clx_selection == 2):
            print("Classifying using SVM...")
            accuracy = svm(sorted_chi2_list, num_features)
            sheet1.write(i, 3, accuracy)
        elif (selection == 4 and clx_selection == 2):
            print("Classifying using SVM...")
            accuracy = svm(sorted_reliefF_list, num_features)
            sheet1.write(i, 3, accuracy)

        elif (selection == 2 and clx_selection == 3):
            print("Classifying using Decision Trees...")
            accuracy = decision_tree(sorted_gain_list, num_features)
            sheet1.write(i, 3, accuracy)
        elif (selection == 3 and clx_selection == 3):
            print("Classifying using Decision Trees...")
            accuracy = decision_tree(sorted_chi2_list, num_features)
            sheet1.write(i, 3, accuracy)
        elif (selection == 4 and clx_selection == 3):
            print("Classifying using Decision Trees...")
            accuracy = decision_tree(sorted_reliefF_list, num_features)
            sheet1.write(i, 3, accuracy)

        elif (selection == 2 and clx_selection == 4):
            print("Classifying using Random Forest...")
            accuracy = randomForest(sorted_gain_list, num_features)
            sheet1.write(i, 6, accuracy)
        elif (selection == 3 and clx_selection == 4):
            print("Classifying using Random Forest...")
            accuracy = randomForest(sorted_chi2_list, num_features)
            sheet1.write(i, 6, accuracy)
        elif (selection == 4 and clx_selection == 4):
            print("Classifying using Random Forest...")
            accuracy = randomForest(sorted_reliefF_list, num_features)
            sheet1.write(i, 6, accuracy)

        else:
            print("Invalid Selection")
        print("End")
    book.save("results.xls")
Пример #33
0
    return scores.mean()


def svm(X, y):
    from sklearn import svm
    scores = cross_val_score(svm.SVC(C=1, kernel='linear'),
                             X,
                             y,
                             cv=5,
                             scoring='accuracy')
    #     print(scores)
    return scores.mean()


if __name__ == '__main__':
    # load data
    data = read_csv('/Users/Bian/Desktop/mockData/StatiscalAnalysis.csv',
                    delimiter=",",
                    skiprows=0)
    data = data.as_matrix()
    X = data[:, 2:]
    y = data[:, 0].astype(int)
    # dimension reduction via LDA
    lda = LDA(n_components=2)
    X = lda.fit_transform(X, y)
    # compute model accuracy
    print('Decision tree accuracy: {0}'.format(decisionTree(X, y)))
    print('KNN accuracy: {0}'.format(knn(X, y)))
    print('Logistic regression accuracy: {0}'.format(logistic(X, y)))
    print('SVM accuracy: {0}'.format(svm(X, y)))
Пример #34
0
#SVM

from sklearn import svm  #import the model library
svm = svm.SVC()
parameters = {
    'kernel': ('linear', 'poly', 'rbf', 'sigmoid'),
    'degree': [1, 3, 5, 7],
    'gamma': ('scale', 'auto')
}

clf = GridSearchCV(svm, parameters)
clf.fit(x_train, y_train)
print(clf.best_params_)

model = svm(gamma='scale', degree=1, kernel='linear',
            C=1)  # sitting model parameters
print("test accuracy: {} ".format(
    svm.fit(x_train, y_train).score(x_test, y_test))
      )  # printing the results of fitting the model over the testing set
print("train accuracy: {} ".format(
    svm.fit(x_train, y_train).score(x_train, y_train))
      )  # printing the results of fitting the model over the training set

#0.9801

#XGBoost

from xgboost import XGBClassifier
model = XGBClassifier()
parameters = {
    'max_depth': [2, 4, 6, 8, 10],
Пример #35
0
def main():
    st.title("ABC Corp..")
    st.title("Automated Machine Learning Web (POC)")
    file_name = '' + datetime.now().strftime("%d%b%Y_%H%M%S%f") + '.csv'
    data_file = './DataDump/file' + file_name
    file_bytes = st.file_uploader("Upload a file")
    data_load_state = st.text("Upload your data")

    try:
        if file_bytes is not None:
            with open(data_file, mode='w', newline='') as f:
                print(file_bytes.getvalue().strip('\r\n'), file=f)
                data_load_state.text("Upload....Done!")
            #dataDF = pd.read_csv(data_file)

    except FileNotFoundError:
        st.error('File not found.')

    st.header("Data Exploration")
    st.sidebar.header("Data Exploration")

    X = ""
    y = ""
    X_train = ''
    X_test = ''
    y_train = ''
    y_test = ''
    y_pred = ''

    @st.cache
    def load_data():
        data = pd.read_csv(data_file)
        # st.write(data.head())
        return data

    if st.sidebar.checkbox("Show Data HEAD or TAIL"):
        select_option = st.radio("Select option", ['HEAD', 'TAIL'])
        if select_option == 'HEAD':
            st.write(load_data().head())
        elif select_option == "TAIL":
            st.write(load_data().tail())

    if st.sidebar.checkbox("Show Full Data"):
        st.write(load_data())
        data_load_state.text("Loading data....Done!")

    if st.sidebar.checkbox("Data Info"):
        st.text("Data Shape")
        st.write(load_data().shape)
        st.text("Data Columns")
        st.write(load_data().columns)
        st.text("Data Type")
        st.write(load_data().dtypes)
        st.text("Count of NaN values")
        st.write(load_data().isnull().any().sum())

    st.markdown("Select Target Column")
    try:
        if file_bytes is not None:
            all_columns = load_data().columns
            dataDF = load_data()
            target = st.selectbox("Select", all_columns)
            if dataDF[target].dtype == "object":
                label_encoder = LabelEncoder()
                dataDF[target] = label_encoder.fit_transform(dataDF[target])

    except:
        st.markdown('File not found.')

    st.markdown("Auto Discard Columns")
    try:
        if file_bytes is not None:
            for column in dataDF:
                if dataDF[column].nunique() == dataDF.shape[0]:
                    dataDF.drop([column], axis=1, inplace=True)
            for column in dataDF:
                if 'name' in column.lower():
                    dataDF.drop([column], axis=1, inplace=True)

            st.text("Data Columns")
            st.write(dataDF.columns)
            st.text("Count of NaN values")
            st.write(dataDF.isnull().any().sum())
    except:
        st.markdown('File not found.')

    st.markdown("Preprocess Object Type Columns")
    try:
        if file_bytes is not None:
            obj_df = dataDF.select_dtypes(include=['object']).copy()
            dataDF = dataDF.select_dtypes(exclude=['object'])
            try:
                one_hot = pd.get_dummies(obj_df)  # ,drop_first=True)
            except Exception as e:
                print("There has been an exception: ", e)
                one_hot = pd.DataFrame()

            dataDF = pd.concat([one_hot, dataDF], axis=1)
    except:
        st.markdown('File not found.')

    sc = StandardScaler()
    st.header("Split DataSet into Train and Test")
    st.sidebar.header("Split DataSet into Train and Test")

    st.markdown("Split")
    try:
        if file_bytes is not None:
            #print(dataDF.dtypes)
            X = dataDF.drop([target], axis=1)
            # X = X.apply(normalize)
            y = dataDF[target]
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=20)
    except:
        st.markdown('File not found.')

    st.markdown("Normalize Columns")
    try:
        if file_bytes is not None:
            from sklearn.preprocessing import MinMaxScaler
            norm = MinMaxScaler()
            X_train = norm.fit_transform(X_train)
            X_test = norm.transform(X_test)
            X = norm.transform(X)
    except:
        st.markdown('File not found.')

    if st.sidebar.checkbox("Show X_test,X_train,y_test,y_train"):
        st.write("X_train")
        st.write(X_train)
        st.write(X_train.shape)
        st.write("X_test")
        st.write(X_test)
        st.write(X_test.shape)
        st.write("y_train")
        st.write(y_train)
        st.write(y_train.shape)
        st.write("y_test")
        st.write(y_test)
        st.write(y_test.shape)

    def gradBoost(X, y):
        from sklearn.ensemble import GradientBoostingClassifier
        gradientBoosting = GradientBoostingClassifier()
        gradientBoosting.fit(X, y)
        return gradientBoosting

    def randForest(X, y):
        from sklearn.ensemble import RandomForestClassifier
        randomForest = RandomForestClassifier()
        randomForest.fit(X, y)
        return randomForest

    def svm(X, y):
        from sklearn import svm
        clf = svm.SVC()
        clf.fit(X, y)
        return clf

    def xgb(X, y):
        import xgboost as xgboost
        xg_reg = xgboost.XGBRegressor()
        xg_reg.fit(X, y)
        return xg_reg

    def linearReg(X, y):
        from sklearn.linear_model import LinearRegression
        lineReg = LinearRegression()
        lineReg.fit(X, y)
        return lineReg

    def lassoReg(X, y):
        from sklearn.linear_model import Lasso
        lasso = Lasso(alpha=0.01)
        lasso.fit(X, y)
        return lasso

    st.subheader("ML Algorithms")
    try:
        if file_bytes is not None:
            st.write("Available algorithms are:")
            st.write(
                "Binary Classification: GB Classifier, RF Classifier, SVM")
            st.write("Regression: OLS, XGB, Lasso Regression")
            if dataDF[target].nunique() == 2:
                st.header("Using Binary Classification Algorithms")
                GB = gradBoost(X_train, y_train)
                st.write(
                    'Accuracy of Gradient Boosting classifier on test set: {:.2f}'
                    .format(GB.score(X_test, y_test)))
                RF = randForest(X_train, y_train)
                st.write(
                    'Accuracy of Random Forest classifier on test set: {:.2f}'.
                    format(RF.score(X_test, y_test)))
                SVM = svm(X_train, y_train)
                st.write(
                    'Accuracy of SVM classifier on test set: {:.2f}'.format(
                        SVM.score(X_test, y_test)))
            elif dataDF[target].nunique() / dataDF[target].count() < .1:
                st.header("Using Multi-Class Classification Algorithms")
                GB = gradBoost(X_train, y_train)
                st.write(
                    'Accuracy of Gradient Boosting classifier on test set: {:.2f}'
                    .format(GB.score(X_test, y_test)))
                st.write(classification_report(y_test, GB.predict(X_test)))
                RF = randForest(X_train, y_train)
                st.write(
                    'Accuracy of Random Forest classifier on test set: {:.2f}'.
                    format(RF.score(X_test, y_test)))
                st.write(classification_report(y_test, RF.predict(X_test)))
            else:
                st.header("Using Regression Algorithms")
                from sklearn.metrics import mean_squared_error, r2_score
                LReg = linearReg(X_train, y_train)
                st.write(
                    'R-squared value for Linear Regression predictor on test set: {:.2f}%'
                    .format(r2_score(y_test, LReg.predict(X_test))))
                XGB = xgb(X_train, y_train)
                st.write(
                    'R-squared value for eXtreme Gradient Boosting Regression predictor on test set: {:.2f}%'
                    .format(r2_score(y_test, XGB.predict(X_test))))
                LassReg = lassoReg(X_train, y_train)
                st.write(
                    'R-squared value for Lasso Regression predictor on test set: {:.2f}%'
                    .format(r2_score(y_test, LassReg.predict(X_test))))
    except:
        st.markdown('File not found.')

    st.header("Run Prediction on Test Set")
    st.subheader("Select Desired Algorithm")
    try:
        if file_bytes is not None:
            if dataDF[target].nunique() == 2:
                selectML = st.selectbox("Select", [
                    'Gradient Boosting classifier', 'Random Forest classifier',
                    'SVM classifier'
                ])
                if selectML == 'Gradient Boosting classifier':
                    dML = GB
                elif selectML == 'Random Forest classifier':
                    dML = RF
                elif selectML == 'SVM classifier':
                    dML = SVM
            elif dataDF[target].nunique() / dataDF[target].count() < .1:
                selectML = st.selectbox("Select", [
                    'Gradient Boosting classifier', 'Random Forest classifier'
                ])
                if selectML == 'Gradient Boosting classifier':
                    dML = GB
                elif selectML == 'Random Forest classifier':
                    dML = RF
            else:
                selectML = st.selectbox("Select", [
                    'Linear Regression predictor',
                    'eXtreme Gradient Boosting Regression predictor',
                    'Lasso Regression predictor'
                ])
                if selectML == 'Linear Regression predictor':
                    dML = LReg
                elif selectML == 'eXtreme Gradient Boosting Regression predictor':
                    dML = XGB
                elif selectML == 'Lasso Regression predictor':
                    dML = LReg

            data_test = './DataDump/file' + datetime.now().strftime(
                "%d%b%Y_%H%M%S%f") + '.csv'
            file_test = st.file_uploader("Upload test file")
            try:
                if file_bytes is not None:
                    with open(data_test, mode='w', newline='') as f:
                        print(file_test.getvalue().strip('\r\n'), file=f)
                        data_load_state.text("Upload....Done!")
                    dataDF1 = pd.read_csv(data_test)
            except FileNotFoundError:
                st.error('File not found.')
    except:
        st.markdown('File not found.')

    st.subheader("PREDICT")
    try:
        if file_bytes is not None:

            for column in dataDF1:
                if dataDF1[column].nunique() == dataDF1.shape[0]:
                    dataDF1.drop([column], axis=1, inplace=True)
            for column in dataDF1:
                if 'name' in column.lower():
                    dataDF1.drop([column], axis=1, inplace=True)

            obj_df1 = dataDF1.select_dtypes(include=['object']).copy()
            dataDF1 = dataDF1.select_dtypes(exclude=['object'])
            try:
                one_hot1 = pd.get_dummies(obj_df1)  # ,drop_first=True)
            except Exception as e:
                print("There has been an exception: ", e)
                one_hot1 = pd.DataFrame()

            dataDF1 = pd.concat([one_hot1, dataDF1], axis=1)

            X1 = dataDF1.drop([target], axis=1)
            y1 = dataDF1[target]
            X1 = norm.transform(X1)

            st.write('Accuracy of Selected Algorithm on test Dataset: {:.2f}'.
                     format(dML.score(X1, y1)))
    except:
        st.markdown('File not found.')
def main():

    data0 = pd.read_csv('data.csv')
    data1 = pd.read_csv('data1.csv')
    data2 = pd.read_csv('data2.csv')
    data3 = pd.read_csv('data3.csv')
    #frames = [data0, data1]
    #data = pd.concat(frames)
    # shuffle the data
    # print(list(data.columns))
    #print('Shuffling Data')
    data0 = data0.sample(frac=1).reset_index(drop=True)
    data1 = data1.sample(frac=1).reset_index(drop=True)
    data2 = data2.sample(frac=1).reset_index(drop=True)
    data3 = data3.sample(frac=1).reset_index(drop=True)

    # Calculate length of 30% data for testing
    val_text = len(data0.index) * (30.0 / 100.0)
    val1_text = len(data1.index) * (30.0 / 100.0)
    val2_text = len(data2.index) * (30.0 / 100.0)
    val3_text = len(data3.index) * (30.0 / 100.0)

    # divide training and test data
    test_data0 = data0.tail(int(val_text)).reset_index(drop=True)
    training_data0 = data0.head(len(data0.index) - int(val_text))
    test_data1 = data1.tail(int(val1_text)).reset_index(drop=True)
    training_data1 = data1.head(len(data1.index) - int(val1_text))
    test_data2 = data2.tail(int(val2_text)).reset_index(drop=True)
    training_data2 = data2.head(len(data2.index) - int(val2_text))
    test_data3 = data3.tail(int(val3_text)).reset_index(drop=True)
    training_data3 = data3.head(len(data3.index) - int(val3_text))
    #test_data = data.tail(1).reset_index(drop=True)
    #training_data = data.head(1)

    # set labels
    # labels for svm
    # print(list(training_data.columns))
    training_labels0 = training_data0['labels'].values
    test_labels0 = test_data0['labels'].values
    training_labels1 = training_data1['labels'].values
    test_labels1 = test_data1['labels'].values
    training_labels2 = training_data2['labels'].values
    test_labels2 = test_data2['labels'].values
    training_labels3 = training_data3['labels'].values
    test_labels3 = test_data3['labels'].values
    # labels for tf classifiers

    training_features_final0 = []
    test_features_final0 = []
    training_features_final1 = []
    test_features_final1 = []
    training_features_final2 = []
    test_features_final2 = []
    training_features_final3 = []
    test_features_final3 = []

    print("Reading Data")
    training_features_final0 = readData(training_data0)
    test_features_final0 = readData(test_data0)
    training_features_final1 = readData(training_data1)
    test_features_final1 = readData(test_data1)
    training_features_final2 = readData(training_data2)
    test_features_final2 = readData(test_data2)
    training_features_final3 = readData(training_data3)
    test_features_final3 = readData(test_data3)

    training_features_final = np.concatenate(
        (training_features_final0, training_features_final1, training_features_final3, training_features_final3), axis=0)
    test_features_final = np.concatenate(
        (test_features_final0, test_features_final1, test_features_final2, test_features_final3), axis=0)
    training_labels = np.concatenate(
        (training_labels0, training_labels1, training_labels2, training_labels3), axis=0)
    test_labels = np.concatenate(
        (test_labels0, test_labels1, test_labels2, test_labels3), axis=0)
    training_X = np.array(training_labels)
    test_X = np.array(test_labels)
    training_X = np.reshape(training_X, (len(training_X), 1))
    test_X = np.reshape(test_X, (len(test_X), 1))

    print("Reshaping Data")
    train_cnn = reshapeData(training_features_final)
    test_cnn = reshapeData(test_features_final)
    training_features_final = reshapeList(training_features_final)
    test_features_final = reshapeList(test_features_final)
    n_dim = training_features_final.shape[1]

    while(True):

        print('Please choose any one of the options:')
        print('Press 1 for Support Vector Machine')
        print('Press 2 for Logistics Regression')
        print('Press 3 for Naive Bayes Classifier')
        print('Press 4 for Neural Network')
        print('Press 5 for Convolutional Neural Network')
        print('Press 6 to Exit')
        print('Press 7 for all')

        choice = int(raw_input())
        bool = False

        if choice == 6:
            break

        if choice == 1 or choice == 7:
            '''
            ###SVM classifier using sklearn
            '''
            bool = True
            svm(training_features_final, training_labels,
                test_features_final, test_labels)

            '''
            ###Logistics Regression Using tensorflow
            '''
        if choice == 2 or choice == 7:
            bool = True
            #print("Initiating Logistics Regression")
            learning_rate = 0.1
            training_epochs = 28

            X = tf.placeholder(tf.float32, [None, n_dim])
            Y = tf.placeholder(tf.float32, [None, 1])
            W = tf.Variable(tf.ones([n_dim, 2]))

            logReg(training_features_final, training_X, test_features_final,
                   test_X, learning_rate, training_epochs, X, Y, W)

        if choice == 4 or choice == 7:
            bool = True
            #print("Initialising Neural Network")
            neuralNetKeras(training_features_final, training_X,
                           test_features_final, test_labels, n_dim)

        if choice == 5 or choice == 7:
            bool = True
            #print("Initialising convolutional neural network ")
            cnnKeras(train_cnn, training_X,
                     test_cnn, test_X, n_dim)

        if choice == 3 or choice == 7:
            bool = True
            #print("Initialising Naive Bayes")
            naiveBayes(training_features_final, training_labels,
                       test_features_final, test_labels)

        if bool == False:
            print('Wrong Choice, Please Choose Again')

        if choice == 7:
            break

    sys.exit()