def KNN(xTrain, xTest, yTrain, yTest):
    print("---KNN report---")
    clf = KNeighborsClassifier()
    param_grid = {'n_neighbors': [x for x in np.arange(1, 30) if x % 2 == 1]}
    CV = GridSearchCV(estimator=clf,
                      param_grid=param_grid,
                      cv=5,
                      n_jobs=CPU_CORES)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of neighbors:" + str(CV.best_params_['n_neighbors']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def NeuralNets(xTrain, xTest, yTrain, yTest):
    print("---Neural Nets report---")
    param_grid = {'learning_rate_init': [10e-4, 10e-3, 10e-2]}
    clf = MLPClassifier(hidden_layer_sizes=(640, ),
                        algorithm='sgd',
                        early_stopping=True,
                        nesterovs_momentum=False,
                        learning_rate='constant')
    CV = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best learning_rate_init " +
          "{0:.3f}".format(CV.best_params_['learning_rate_init']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def RandomForest(xTrain, xTest, yTrain, yTest):
    print("---Random Forest report---")
    clf = RandomForestClassifier(n_estimators=100)
    param_grid = {
        'max_features':
        [x for x in [1, 2, 4, 6, 8, 12, 16, 20] if x < len(xTrain[0])]
    }
    CV = GridSearchCV(estimator=clf,
                      param_grid=param_grid,
                      cv=5,
                      n_jobs=CPU_CORES)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of max_features:" +
          str(CV.best_params_['max_features']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def BoostedDecisionTree(xTrain, xTest, yTrain, yTest):
    print("---Boosted Decision Tree report---")
    param_grid = {
        'n_estimators': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    }

    # classify
    clf = AdaBoostClassifier()
    CV = GridSearchCV(estimator=clf,
                      param_grid=param_grid,
                      cv=5,
                      n_jobs=CPU_CORES)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of n_estimators:" +
          str(CV.best_params_['n_estimators']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def linearSVC(xTrain, xTest, yTrain, yTest):
    print("---LinearSVM report---")

    svc = LinearSVC(C=1.0,
                    class_weight=None,
                    max_iter=-1,
                    random_state=None,
                    tol=0.001,
                    verbose=False)

    param_grid = {'C': [0.1, 1, 2, 3, 4, 5, 6, 7]}

    CV = GridSearchCV(svc, param_grid=param_grid, cv=5, n_jobs=CPU_CORES)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of C:" + str(CV.best_params_['C']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def XGBoost(xTrain, xTest, yTrain, yTest):
    xTrain = np.array(xTrain)
    xTest = np.array(xTest)
    yTrain = np.array(yTrain)
    yTest = np.array(yTest)

    print("---XGBoost report---")
    param_grid = {
        'n_estimators': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    }

    clf = xgb.XGBClassifier()
    CV = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of n_estimators:" +
          str(CV.best_params_['n_estimators']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)
def SVM(xTrain, xTest, yTrain, yTest):
    print("---SVM with Linear kernel (LibSVM) report---")
    svc = svm.SVC(C=1.0,
                  cache_size=200,
                  class_weight=None,
                  coef0=0.0,
                  gamma='auto',
                  kernel='linear',
                  max_iter=-1,
                  probability=False,
                  random_state=None,
                  shrinking=True,
                  tol=0.001,
                  verbose=False)

    param_grid = {'C': [0.1, 1, 2, 3, 4, 5, 6, 7]}

    CV = GridSearchCV(svc, param_grid=param_grid, cv=5, n_jobs=CPU_CORES)
    t = time.time()
    CV.fit(xTrain, yTrain)
    elapsed_time = time.time() - t
    print("best number of C:" + str(CV.best_params_['C']))
    print("training time: " + str(elapsed_time) + 's')
    generateReport(CV, xTest, yTest)