示例#1
0
from sklearn import linear_model
from sklearn import model_selection
from sklearn import svm

# Load dataset
names = ['QIN', 'TIN', 'pHIN', 'CondIN', 'CODIN', 'SSIN', 'BOD5IN']
dataset = pandas.read_csv('aritma.csv', names=names)

# Split-out validation dataset
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values
test_rate = 0.25
xTrain, xTest, yTrain, yTest = model_selection.train_test_split(x, y, test_size=test_rate, random_state=0)

classifiers = [('SVM', svm.SVR()), ('SGDRegressor', linear_model.SGDRegressor()),
               ('PassiveAggressiveRegressor', linear_model.PassiveAggressiveRegressor())]

for name, model in classifiers:
    clf = model
    clf.fit(xTrain, yTrain)
    yPredicted = clf.predict(xTest)
    plt.figure()
    print("yPredicted", yPredicted)
    plt.title(name + ' Test' + ' Score: ' + str(clf.score(xTrain, yTrain)))
    plt.plot(yPredicted, color='red', label='predicted', marker='.')
    plt.plot(yTest, color='blue', label='Actual', marker='*')
    plt.legend()
    plt.savefig(name + '_Test' + '_plot.svg', dpi=300)
    file = open('modeller.txt', 'a')
    file.write("\n\nModel Adı: {}, \n {}".format(name, model))
    file.close()
示例#2
0
                        X_train=train,
                        y=target,
                        X_test=test,
                        nfolds=5,
                        seed=rnd,
                        category="classifier",
                        filename="PasAggC",
                        setused=setused,
                        tag="2")

#%%

# Level 2 Score:

clf = linear_model.PassiveAggressiveRegressor(n_iter=100,
                                              random_state=rnd,
                                              verbose=0)

model_sum = blend_proba(clf=clf,
                        X_train=train,
                        y=target,
                        X_test=test,
                        nfolds=5,
                        seed=rnd,
                        category="regressor",
                        filename="PasAggR",
                        setused=setused,
                        tag="1")

#%%
示例#3
0
    def generate_prediction(cls, race):
        """Generate a prediction for the specified race"""

        prediction = {
            'race_id': race['_id'],
            'earliest_date': cls.get_earliest_date(),
            'prediction_version': cls.PREDICTION_VERSION,
            'seed_version': Seed.SEED_VERSION,
            'results': None,
            'score': None,
            'train_seeds': None,
            'test_seeds': None,
            'estimator': None
        }

        predictor = None
        generate_predictor = False

        segment = tuple(race['entry_conditions']) + tuple(
            [race['track_condition']])
        with cls.predictor_cache_lock:
            if segment in cls.predictor_cache:
                predictor = cls.predictor_cache[segment]
            else:
                cls.predictor_cache[segment] = None
                generate_predictor = True

        if generate_predictor:

            similar_races = pyracing.Race.find({
                'entry_conditions':
                race['entry_conditions'],
                'track_condition':
                race['track_condition'],
                'start_time': {
                    '$lt': race.meet['date']
                }
            })
            if len(similar_races) >= (1 / cls.TEST_SIZE):

                try:

                    train_races, test_races = cross_validation.train_test_split(
                        similar_races, test_size=cls.TEST_SIZE)

                    train_X = []
                    train_y = []
                    for train_race in train_races:
                        for seed in train_race.seeds:
                            if seed['result'] is not None:
                                train_X.append(seed.normalized_data)
                                train_y.append(seed['result'])

                    test_X = []
                    test_y = []
                    for test_race in test_races:
                        for seed in test_race.seeds:
                            if seed['result'] is not None:
                                test_X.append(seed.normalized_data)
                                test_y.append(seed['result'])

                    predictor = {
                        'classifier': None,
                        'score': None,
                        'train_seeds': len(train_y),
                        'test_seeds': len(test_y),
                        'estimator': None
                    }
                    dual = len(train_X) < len(train_X[0])
                    kernel = 'linear'
                    loss = 'epsilon_insensitive'
                    if not dual:
                        loss = 'squared_epsilon_insensitive'
                    for estimator in (
                            linear_model.BayesianRidge(),
                            linear_model.ElasticNet(),
                            linear_model.LinearRegression(),
                            linear_model.LogisticRegression(),
                            linear_model.OrthogonalMatchingPursuit(),
                            linear_model.PassiveAggressiveRegressor(),
                            linear_model.Perceptron(), linear_model.Ridge(),
                            linear_model.SGDRegressor(),
                            svm.SVR(kernel=kernel),
                            svm.LinearSVR(dual=dual,
                                          loss=loss), svm.NuSVR(kernel=kernel),
                            tree.DecisionTreeRegressor(),
                            tree.ExtraTreeRegressor()):
                        logging.debug(
                            'Trying {estimator} for {segment}'.format(
                                estimator=estimator.__class__.__name__,
                                segment=segment))

                        try:
                            classifier = pipeline.Pipeline([
                                ('feature_selection',
                                 feature_selection.SelectFromModel(
                                     estimator, 'mean')),
                                ('regression', estimator)
                            ])
                            classifier.fit(train_X, train_y)
                            score = classifier.score(test_X, test_y)

                            if predictor['classifier'] is None or predictor[
                                    'score'] is None or score > predictor[
                                        'score']:
                                logging.debug(
                                    'Using {estimator} ({score}) for {segment}'
                                    .format(
                                        estimator=estimator.__class__.__name__,
                                        score=score,
                                        segment=segment))
                                predictor['classifier'] = classifier
                                predictor['score'] = score
                                predictor[
                                    'estimator'] = estimator.__class__.__name__

                        except BaseException as e:
                            logging.debug(
                                'Caught exception while trying {estimator} for {segment}: {exception}'
                                .format(estimator=estimator.__class__.__name__,
                                        segment=segment,
                                        exception=e))
                            continue

                    cls.predictor_cache[segment] = predictor

                except:

                    del cls.predictor_cache[segment]
                    raise

            else:

                del cls.predictor_cache[segment]

        else:

            while predictor is None:
                try:
                    predictor = cls.predictor_cache[segment]
                    time.sleep(10)
                except KeyError:
                    break

        if predictor is not None:

            reverse = False
            if 'score' in predictor and predictor['score'] is not None:
                reverse = predictor['score'] < 0
                prediction['score'] = abs(predictor['score'])

            if 'classifier' in predictor and predictor[
                    'classifier'] is not None:
                raw_results = {}
                for seed in race.seeds:
                    raw_result = predictor['classifier'].predict(
                        numpy.array(seed.normalized_data).reshape(1, -1))[0]
                    if raw_result is not None:
                        if not raw_result in raw_results:
                            raw_results[raw_result] = []
                        raw_results[raw_result].append(seed.runner['number'])
                for key in sorted(raw_results.keys(), reverse=reverse):
                    if prediction['results'] is None:
                        prediction['results'] = []
                    prediction['results'].append(
                        sorted([number for number in raw_results[key]]))

            if 'train_seeds' in predictor:
                prediction['train_seeds'] = predictor['train_seeds']

            if 'test_seeds' in predictor:
                prediction['test_seeds'] = predictor['test_seeds']

            if 'estimator' in predictor:
                prediction['estimator'] = predictor['estimator']

        return prediction
示例#4
0
        regression(linear_model.ARDRegression()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsCV()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.PassiveAggressiveRegressor(
            random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),

        # Logistic Regression
        classification(linear_model.LogisticRegression(
            random_state=RANDOM_SEED)),
        classification(linear_model.LogisticRegressionCV(
            random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifierCV()),
        classification(linear_model.SGDClassifier(random_state=RANDOM_SEED)),

        classification_binary(linear_model.LogisticRegression(
示例#5
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)
def predict(train_list, train_result, test_list, method_list, **kwargs):
    def fit_predict_each_output(model, target):
        __predict_result = []
        for idx in range(np.size(target, 1)):
            model.fit(train_list, target[:, idx])
            __predict_result.append(model.predict(test_list))
        return np.transpose(np.asarray(__predict_result))

    def fit_predict(model, target):
        model.fit(train_list, target)
        return model.predict(test_list)

    from_bins_idx = kwargs["from_bins_idx"]
    to_bins_idx = kwargs["to_bins_idx"]
    _binned_train_result = to_bins_idx(train_result)

    _predict_result = []
    if "current" in method_list:
        rbm = neural_network.BernoulliRBM(n_components=512, verbose=False, n_iter=100, learning_rate=1e-2, random_state=0)
        rbm.fit(train_list)
        rbm.fit(test_list)
        _predict_result.append(np.transpose(np.asarray(__predict_result)))
    elif "knn" in method_list:
        _ = knn_predict(train_list, _binned_train_result, test_list, k=kwargs["k"])
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "dt" in method_list:
        _ = fit_predict(tree.DecisionTreeClassifier(max_depth=kwargs["max_depth"]), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "rf" in method_list:
        _ = fit_predict(ensemble.RandomForestClassifier(n_estimators=kwargs["n_estimators"], max_depth=kwargs["max_depth"], n_jobs=kwargs["n_jobs"]), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "average" in method_list:
        _ = average_predict(train_result, test_list)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "adaboost" in method_list:
        _ = fit_predict_each_output(ensemble.AdaBoostClassifier(), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "ridge" in method_list:
        _ = fit_predict_each_output(linear_model.RidgeClassifier(), _binned_train_result)
        _predict_result.append(from_bins_idx(np.asarray(_, dtype=int)))
    elif "linear" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.LinearRegression(), train_result))
    elif "huber" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.HuberRegressor(), train_result))
    elif "theilsen" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.TheilSenRegressor(), train_result))
    elif "lasso" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.Lasso(), train_result))
    elif "par" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.PassiveAggressiveRegressor(C=kwargs["par_C"], epsilon=kwargs["par_eps"]), train_result))
    elif "ridge_reg" in method_list:
        _predict_result.append(fit_predict_each_output(linear_model.Ridge(), train_result))
    elif "dt_reg" in method_list:
        _predict_result.append(fit_predict(tree.DecisionTreeRegressor(max_depth=kwargs["max_depth"]), train_result))
    elif "rf_reg" in method_list:
        _predict_result.append(fit_predict(ensemble.RandomForestRegressor(max_depth=kwargs["max_depth"], n_jobs=kwargs['n_jobs'], n_estimators=kwargs['n_estimators']), train_result))
    elif "xgboost" in method_list:
        _predict_result.append(fit_predict_each_output(xgb.XGBClassifier(max_depth=kwargs["max_depth"], n_estimators=kwargs['n_estimators'], nthread=kwargs["nthread"]), _binned_train_result))
    elif "xgboost_reg" in method_list:
        _predict_result.append(fit_predict_each_output(xgb.XGBRegressor(max_depth=kwargs["max_depth"], n_estimators=kwargs['n_estimators'], nthread=kwargs["nthread"]), train_result))
    elif "svr" in method_list:
        _predict_result.append(fit_predict_each_output(svm.SVR(C=kwargs["C"], epsilon=kwargs["epsilon"]), train_result))
    elif "linear_svr" in method_list:
        _predict_result.append(fit_predict_each_output(svm.LinearSVR(C=kwargs["C"], epsilon=kwargs["epsilon"]), train_result))
    else:
        assert False, "invalid method"
    return np.asarray(_predict_result)
示例#7
0
regr.fit(trainingInstances, trainingLables)

predictedRewards = regr.predict(testInstances)
predictedRewardsList[2].append(predictedRewards)
# printer.PrintProsentageOfFail(predictedRewards, actualDuration)

# Fourth Test - Passive Aggressive
# ------------------------------
testName4Reg = "Fourth Test - Passive Aggressive"
regr = linear_model.PassiveAggressiveRegressor(C=1.0,
                                               fit_intercept=True,
                                               max_iter=1000,
                                               tol=0.002,
                                               early_stopping=False,
                                               validation_fraction=0.1,
                                               n_iter_no_change=100,
                                               shuffle=True,
                                               verbose=0,
                                               loss='epsilon_insensitive',
                                               epsilon=0.1,
                                               random_state=None,
                                               warm_start=False,
                                               average=False)
regr.fit(trainingInstances, trainingLables)

predictedRewards = regr.predict(testInstances)
predictedRewardsList[3].append(predictedRewards)
# printer.PrintProsentageOfFail(predictedRewards, actualDuration)

# Fifth Test - DecisionTree
# --------------------------
testName5Reg = "Fifth Test - DecisionTree"
r3 = ensemble.AdaBoostRegressor(random_state=0,
                                loss='linear',
                                learning_rate=3.0,
                                n_estimators=700)
r4 = ensemble.GradientBoostingRegressor()
r5 = ensemble.BaggingRegressor()  # overfitting
r6 = ensemble.ExtraTreesRegressor()  # overfitting
r7 = linear_model.BayesianRidge(normalize=True)
r8 = linear_model.ARDRegression(normalize=True)
r9 = linear_model.HuberRegressor()
r10 = linear_model.Lasso(random_state=0, selection='cyclic', normalize=False)
r11 = svm.LinearSVR(random_state=0,
                    loss='squared_epsilon_insensitive',
                    dual=True)
r12 = gaussian_process.GaussianProcessRegressor()  # overfitting
r13 = linear_model.PassiveAggressiveRegressor()  # takes okayisch time
r14 = linear_model.RANSACRegressor()  # overfitting?
r15 = linear_model.SGDRegressor(shuffle=True,
                                penalty='l1',
                                loss='squared_epsilon_insensitive',
                                learning_rate='invscaling',
                                epsilon=0.1,
                                early_stopping=False,
                                average=True)
r16 = linear_model.TheilSenRegressor()  # eher Verschlechterung
# r17 = neural_network.MLPRegressor()

# #Unoptimized
# r1 = linear_model.LinearRegression()
# r2 = ensemble.RandomForestRegressor(max_depth=3, min_samples_split=2, random_state=0, n_estimators=700)
# r3 = ensemble.AdaBoostRegressor(random_state=0, n_estimators=100)
示例#9
0
print ("\n\nARD Regression 4: accuracy=", acc, "\ncoef_=", ARDReg.coef_ )


############ Stochastic Gradient Descent Regressor ############
#paramDict = {'loss': ['squared_loss', 'epsilon_insensitive', 'squared_epsilon_insensitive'], 'penalty': ['l2', 'elasticnet', None], 'alpha': [.00001, .0001 ], 'epsilon': [.1, .05, .01], 'learning_rate': ['optimal', 'invscaling'] }
SGDReg = linear_model.SGDRegressor(alpha=1e-05, penalty='l2', loss='squared_loss', learning_rate='invscaling', epsilon=0.01)
SGDReg.fit(features_train, labels_train)
pred = SGDReg.predict(features_test)
acc = SGDReg.score(features_test, labels_test)
print ("\n\nSGD Regression: accuracy=", acc, "\ncoef_=", SGDReg.coef_ )
####### BestScore= 0.787316849183 with these best Parameters= {'alpha': 1e-05, 'penalty': 'l2', 'loss': 'squared_loss', 'learning_rate': 'invscaling', 'epsilon': 0.01}


############ Passive-Aggressive Regressor  ############
#paramDict = {'C': [.1, 1, 10, 25, 50], 'epsilon': [.02, .05, .01, .001], 'n_iter': [3, 5, 10, 20], 'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'] }
passReg =  linear_model.PassiveAggressiveRegressor(C=25, loss='epsilon_insensitive', n_iter=10, epsilon=0.02)
passReg.fit(features_train, labels_train)
pred = passReg.predict(features_test)
acc = passReg.score(features_test, labels_test)
print ("\n\npass Regression 3: accuracy=", acc, "\ncoef_=", passReg.coef_ )
########   BestScore= 0.79440546561  with these best Parameters= {'C': 50, 'loss': 'epsilon_insensitive', 'n_iter': 10, 'epsilon': 0.02}


############ Polynomial  ############
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
pipe2 = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', linear_model.LinearRegression(fit_intercept=False))])
pipe3 = Pipeline([('poly', PolynomialFeatures(degree=3)), ('linear', linear_model.LinearRegression(fit_intercept=False))])
pipe2.fit(features_train, labels_train)
pred = pipe2.predict(features_test)
acc = pipe2.score(features_test, labels_test)
示例#10
0
def regress_sys(folder, all_videos, y, training_size, have_output=True):
    """
    Uses regression techniques to select the best tracking parameters.
    Regression again intensities of input images.

    Parameters
    ----------
    all_videos: list
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    y: numpy array
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size: int
        Number of files in training dataset.
    have_output: boolean
        If you have already acquired the quality values (y) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.

    Returns
    -------
    regress_object: list of sklearn regression objects.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    """

    tprefix = []
    for i in range(0, training_size):
        random.seed(i + 1)
        tprefix.append(all_videos[random.randint(0, len(all_videos))])
        if have_output is False:
            print("Get parameters for: {}".format(tprefix[i]))

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            pup = name.split('_')[0]
            local_im = name + '.tif'
            remote_im = "{}/{}/{}".format(folder, pup, local_im)
            aws.download_s3(remote_im, local_im)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[0, :, :])
            descriptors[counter, 1] = np.std(test_image[0, :, :])
            descriptors[counter, 2] = np.percentile(test_image[0, :, :], 10)
            descriptors[counter, 3] = np.percentile(test_image[0:, :, :], 90)
            counter = counter + 1

        # Define regression techniques
        X = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(X, y))

        return regress_object
def run_specific_combination(test_frame, reg_type, column_list):
    target_feature = test_frame['Endurance_Score']
    test_df = test_frame.filter(column_list, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(
        test_df,
        target_feature.values.reshape(-1, 1),
        test_size=0.20,
        random_state=0)
    if reg_type == 'dt':
        regr = DecisionTreeRegressor(max_depth=2)
    elif reg_type == 'lin':
        regr = linear_model.LinearRegression()
    elif reg_type == 'ridge':
        regr = linear_model.Ridge(alpha=1500.0)
    elif reg_type == 'lasso':
        regr = linear_model.Lasso(alpha=10.0)
    elif reg_type == 'bayridge':
        regr = linear_model.BayesianRidge()
    elif reg_type == 'sgd':
        regr = linear_model.SGDRegressor(loss='huber')
    elif reg_type == 'lars':
        regr = linear_model.Lars(n_nonzero_coefs=np.inf)
    elif reg_type == 'pasagv':
        regr = linear_model.PassiveAggressiveRegressor(random_state=0)
    elif reg_type == 'kernelridge':
        regr = kernel_ridge.KernelRidge()
    elif reg_type == 'svr':
        regr = svm.SVR()
    elif reg_type == 'kneigh':
        regr = neighbors.KNeighborsRegressor(algorithm='kd_tree')
    elif reg_type == 'gauss':
        regr = gaussian_process.GaussianProcessRegressor()
    elif reg_type == 'gbr':
        params = {
            'n_estimators': 760,
            'max_depth': 4,
            'min_samples_split': 3,
            'learning_rate': 0.026,
            'loss': 'huber'
        }
        regr = GradientBoostingRegressor(**params)
    elif reg_type == 'ran':
        regr = RandomForestRegressor(n_estimators=300, max_depth=8)
    elif reg_type == 'et':
        regr = ExtraTreesRegressor()
    else:
        return
    x_train_frame = X_train.copy()
    del x_train_frame['Title']
    del x_train_frame['Artist']
    regr.fit(x_train_frame, y_train.ravel())
    x_test_frame = X_test.copy()
    del x_test_frame['Title']
    del x_test_frame['Artist']
    y_pred = regr.predict(x_test_frame)
    rmse = mean_squared_error(y_test, y_pred)
    score = r2_score(y_test, y_pred)
    print("R2-score: {}, RMSE: {}".format(score, math.sqrt(rmse)))
    result_df = pd.DataFrame(columns=[
        'Song', 'Artist', 'Endurance_Score', 'Predicted_Endurance_Score'
    ])
    result_df['Song'] = X_test['Title']
    result_df['Artist'] = X_test['Artist']
    ###
    result_df['Entry_Position'] = ""
    result_df['Peak_Position'] = ""
    result_df['Total_Weeks'] = ""
    result_df['danceability'] = ""
    result_df['energy'] = ""
    result_df['key'] = ""
    result_df['loudness'] = ""
    result_df['mode'] = ""
    result_df['speechiness'] = ""
    result_df['acousticness'] = ""
    result_df['instrumentalness'] = ""
    result_df['liveness'] = ""
    result_df['valence'] = ""
    result_df['tempo'] = ""
    result_df['duration_ms'] = ""
    result_df['time_signature'] = ""
    result_df['Movies_TV_feature_count'] = ""
    result_df['Oscars_won'] = ""
    result_df['Artist_lifetime_grammy_achievement'] = ""
    result_df['Artist_grammy_wins'] = ""
    result_df['Artist_grammy_nominations'] = ""
    result_df['artist popularity'] = ""
    result_df['TopSongsArtist'] = ""
    result_df['TopSongsArtist10'] = ""
    result_df['TopSongsArtist100'] = ""
    result_df['Entry_Year'] = ""
    result_df['days_before_charting'] = ""
    result_df['Age_Percentage_15_30'] = ""

    ###

    result_df['Endurance_Score'] = y_test.ravel()
    result_df['Predicted_Endurance_Score'] = y_pred
    base_df = pd.read_csv('{0}/{1}.csv'.format(path_final_csv,
                                               'final_unnormalized_dataset'),
                          encoding='latin-1')
    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Entry_Position

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Entry_Position'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Peak_Position

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Peak_Position'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Total_Weeks

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Total_Weeks'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.danceability

    for i, row in result_df.iterrows():
        result_df.loc[i, 'danceability'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.energy

    for i, row in result_df.iterrows():
        result_df.loc[i, 'energy'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.key

    for i, row in result_df.iterrows():
        result_df.loc[i, 'key'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.loudness

    for i, row in result_df.iterrows():
        result_df.loc[i, 'loudness'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.mode

    for i, row in result_df.iterrows():
        result_df.loc[i, 'mode'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.speechiness

    for i, row in result_df.iterrows():
        result_df.loc[i, 'speechiness'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.acousticness

    for i, row in result_df.iterrows():
        result_df.loc[i, 'acousticness'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.instrumentalness

    for i, row in result_df.iterrows():
        result_df.loc[i, 'instrumentalness'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.liveness

    for i, row in result_df.iterrows():
        result_df.loc[i, 'liveness'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.valence

    for i, row in result_df.iterrows():
        result_df.loc[i, 'valence'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.tempo

    for i, row in result_df.iterrows():
        result_df.loc[i, 'tempo'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.duration_ms

    for i, row in result_df.iterrows():
        result_df.loc[i, 'duration_ms'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.time_signature

    for i, row in result_df.iterrows():
        result_df.loc[i, 'time_signature'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Movies_TV_feature_count

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Movies_TV_feature_count'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Oscars_won

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Oscars_won'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Artist_lifetime_grammy_achievement

    for i, row in result_df.iterrows():
        result_df.loc[i,
                      'Artist_lifetime_grammy_achievement'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Artist_grammy_wins

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Artist_grammy_wins'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Artist_grammy_nominations

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Artist_grammy_nominations'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row['artist popularity']

    for i, row in result_df.iterrows():
        result_df.loc[i, 'artist popularity'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.TopSongsArtist

    for i, row in result_df.iterrows():
        result_df.loc[i, 'TopSongsArtist'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.TopSongsArtist10

    for i, row in result_df.iterrows():
        result_df.loc[i, 'TopSongsArtist10'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.TopSongsArtist100

    for i, row in result_df.iterrows():
        result_df.loc[i, 'TopSongsArtist100'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Entry_Year

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Entry_Year'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.days_before_charting

    for i, row in result_df.iterrows():
        result_df.loc[i, 'days_before_charting'] = results[row.Song]
    ####-----------------------------------------------###

    ####-----------------------------------------------###
    results = dict()
    for i, row in base_df.iterrows():
        results[row.Title] = row.Age_Percentage_15_30

    for i, row in result_df.iterrows():
        result_df.loc[i, 'Age_Percentage_15_30'] = results[row.Song]
    ####-----------------------------------------------###

    result_df.to_csv('{0}/{1}.csv'.format(path_final_csv,
                                          'predicted_finaldata'),
                     index=False)
示例#12
0
explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores = update_list(explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores)

##################################################
##          Passive-agressive algorithms        ##
##################################################
'''
The passive-aggressive algorithms are a family of algorithms
for large-scale learning. They are similar to the Perceptron
in that they do not require a learning rate. However,
contrary to the Perceptron, they include a regularization parameter C.

Example:
http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
'''
try:
	pa_regr = linear_model.PassiveAggressiveRegressor(random_state=0)
	pa_regr.fit(X_train, y_train)
	predictions = cross_val_predict(pa_regr, X_test, y_test, cv=6)
	f=open('pa_regr.pickle','wb')
	pickle.dump(pa_regr,f)
	f.close()
except:
	print('error - PASSIVE-AGGRESSIVE')

# get stats 
modeltypes.append('passive-agressive algorithm')
explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores = update_list(explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores)

##################################################
##                   RANSAC                     ## 
##################################################
示例#13
0
    'ardregression': lm.ARDRegression(),
    'bayesianridge': lm.BayesianRidge(),
    'elasticnet': lm.ElasticNet(),
    'elasticnetcv': lm.ElasticNetCV(),
    'huberregression': lm.HuberRegressor(),
    'lars': lm.Lars(),
    'larscv': lm.LarsCV(),
    'lasso': lm.Lasso(),
    'lassocv': lm.LassoCV(),
    'lassolars': lm.LassoLars(),
    'lassolarscv': lm.LassoLarsCV(),
    'lassolarsic': lm.LassoLarsIC(),
    'linearregression': lm.LinearRegression(),
    'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(),
    'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(),
    'passiveagressiveregressor': lm.PassiveAggressiveRegressor(),
    'ridge': lm.Ridge(),
    'ridgecv': lm.RidgeCV(),
    'sgdregressor': lm.SGDRegressor(),
    'theilsenregressor': lm.TheilSenRegressor(),
    'decisiontreeregressor': DecisionTreeRegressor(),
    'randomforestregressor': RandomForestRegressor(),
    'adaboostregressor': AdaBoostRegressor(),
    'baggingregressor': BaggingRegressor(),
    'extratreeregressor': ExtraTreeRegressor(),
    'linearsvr': LinearSVR(),
    'nusvr': NuSVR(),
    'svr': SVR(),
    }

示例#14
0
])
results2 = superfunk(linear_model.RANSACRegressor(), ndata[0], [
    "Coh_Swath", "Coh_SwathOverPoca", "DayInYear_Swath", "Dist_SwathToPoca",
    "Heading_Swath", "LeadEdgeS_Poca", "LeadEdgeW_Poca",
    "PhaseConfidence_Swath", "PhaseSSegment_Swath", "PowerScaled_Swath",
    "PowerScaled_SwathOverPoca", "SampleNb_Swath",
    "SampleNb_SwathMinusLeadEdgeS", "Phase_Swath", "Phase_SwathOverPoca"
])
results3 = superfunk(linear_model.BayesianRidge(), ndata[0], [
    "Coh_Swath", "Coh_SwathOverPoca", "DayInYear_Swath", "Dist_SwathToPoca",
    "Heading_Swath", "LeadEdgeS_Poca", "LeadEdgeW_Poca",
    "PhaseConfidence_Swath", "PhaseSSegment_Swath", "PowerScaled_Swath",
    "PowerScaled_SwathOverPoca", "SampleNb_Swath",
    "SampleNb_SwathMinusLeadEdgeS", "Phase_Swath", "Phase_SwathOverPoca"
])
results4 = superfunk(linear_model.PassiveAggressiveRegressor(), ndata[0], [
    "Coh_Swath", "Coh_SwathOverPoca", "DayInYear_Swath", "Dist_SwathToPoca",
    "Heading_Swath", "LeadEdgeS_Poca", "LeadEdgeW_Poca",
    "PhaseConfidence_Swath", "PhaseSSegment_Swath", "PowerScaled_Swath",
    "PowerScaled_SwathOverPoca", "SampleNb_Swath",
    "SampleNb_SwathMinusLeadEdgeS", "Phase_Swath", "Phase_SwathOverPoca"
])
results5 = superfunk(linear_model.SGDRegressor(), ndata[0], [
    "Coh_Swath", "Coh_SwathOverPoca", "DayInYear_Swath", "Dist_SwathToPoca",
    "Heading_Swath", "LeadEdgeS_Poca", "LeadEdgeW_Poca",
    "PhaseConfidence_Swath", "PhaseSSegment_Swath", "PowerScaled_Swath",
    "PowerScaled_SwathOverPoca", "SampleNb_Swath",
    "SampleNb_SwathMinusLeadEdgeS", "Phase_Swath", "Phase_SwathOverPoca"
])
results6 = superfunk(svm.SVR(), ndata[0], [
    "Coh_Swath", "Coh_SwathOverPoca", "DayInYear_Swath", "Dist_SwathToPoca",
testRegressor( train, linear_model.Ridge(), target, 'RidgeRegression' )
testRegressor( train, linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 2, 4, 8, 16, 32]), target, 'RidgeRegressionCV' )

testRegressor( train, linear_model.Lasso(), target, 'Lasso' )
testRegressor( train, linear_model.LassoLars(), target, 'LassoLars' )

testRegressor( train, OrthogonalMatchingPursuit(), target, 'OMP' )

# Stochastic gradient descent
testRegressor( train, linear_model.SGDRegressor( loss='squared_loss' ), target, 'SGDRegressor squared loss' )

# Bayesian approaches
testRegressor( train, linear_model.BayesianRidge(), target, 'BayesianRidgeRegression' )
#testRegressor( train, ARDRegression() , target, 'ARDRegression' )

testRegressor( train, linear_model.PassiveAggressiveRegressor(loss='epsilon_insensitive') , target, 'PassiveAggressiveRegressor' )
testRegressor( train, linear_model.PassiveAggressiveRegressor(loss='squared_epsilon_insensitive') , target, 'PassiveAggressiveRegressor squared loss' )


# Support Vector machines
testRegressor( train, svm.SVR(kernel='poly'), target, 'SVM poly' )
testRegressor( train, svm.SVR(kernel='rbf'), target, 'SVM rbf' )
testRegressor( train, svm.SVR(kernel='sigmoid'), target, 'SVM sigmoid' )

# Nearest neighbors
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=1 ), target, 'NearestNeighbor 1' )
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=2 ), target, 'NearestNeighbor 2' )
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=3 ), target, 'NearestNeighbor 3' )
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=4 ), target, 'NearestNeighbor 4' )
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=8 ), target, 'NearestNeighbor 8' )
testRegressor( train, neighbors.KNeighborsRegressor( n_neighbors=16 ), target, 'NearestNeighbor 16' )
target_train = target[:int(.9 * n_samples)]
data_test = data[int(.9 * n_samples):]
target_test = target[int(.9 * n_samples):]

# classfication scores
print('# Classification scores:')
print('KNN: %f' % neighbors.KNeighborsClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.ElasticNet: %f' % linear_model.ElasticNet().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.ElasticNetCV: %f' % linear_model.ElasticNetCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Lars: %f' % linear_model.Lars().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Lasso: %f' % linear_model.Lasso().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoCV: %f' % linear_model.LassoCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoLars: %f' % linear_model.LassoLars().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoLarsIC: %f' % linear_model.LassoLarsIC().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LinearRegression: %f' % linear_model.LinearRegression().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LogisticRegression: %f' % linear_model.LogisticRegression().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.OrthogonalMatchingPursuit: %f' % linear_model.OrthogonalMatchingPursuit().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.PassiveAggressiveClassifier: %f' % linear_model.PassiveAggressiveClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.PassiveAggressiveRegressor: %f' % linear_model.PassiveAggressiveRegressor().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Perceptron: %f' % linear_model.Perceptron().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Ridge: %f' % linear_model.Ridge().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeClassifier: %f' % linear_model.RidgeClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeClassifierCV: %f' % linear_model.RidgeClassifierCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeCV: %f' % linear_model.RidgeCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.SGDClassifier: %f' % linear_model.SGDClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.SGDRegressor: %f' % linear_model.SGDRegressor().fit(data_train, target_train).score(data_test, target_test))
print('naive_bayes.MultinomialNB: %f' % naive_bayes.MultinomialNB().fit(data_train, target_train).score(data_test, target_test))
print('lda.LDA: %f' % lda.LDA().fit(data_train, target_train).score(data_test, target_test))
print('svm.SVR: %f' % svm.SVR().fit(data_train, target_train).score(data_test, target_test))
print('svm.SVC: %f' % svm.SVC(kernel='linear').fit(data_train, target_train).score(data_test, target_test))
print('svm.LinearSVC: %f' % svm.LinearSVC().fit(data_train, target_train).score(data_test, target_test))
示例#17
0
def train_passive_aggressive_regressor():
    # Picking model
    return mp.ModelProperties(
        regression=True,
        online=True), linear_model.PassiveAggressiveRegressor()
示例#18
0
    def models(self) -> Dict[str, LinearModel]:
        return {
            "LinearRegression":
            linear_model.LinearRegression(
            ),  # LinearRegression([…])	Ordinary least squares Linear Regression.
            "ARDRegression":
            linear_model.ARDRegression(
            ),  #  ARDRegression([n_iter, tol, …])	Bayesian ARD regression.
            "BayesianRidge":
            linear_model.BayesianRidge(
            ),  # BayesianRidge([n_iter, tol, …])	Bayesian ridge regression.
            "HuberRegressor":
            linear_model.HuberRegressor(
            ),  # HuberRegressor([epsilon, …])	Linear regression model that is robust to outliers.
            "OrthogonalMatchingPursuitCV":
            linear_model.OrthogonalMatchingPursuitCV(
                cv=5
            ),  # OrthogonalMatchingPursuitCV([…])	Cross-validated Orthogonal Matching Pursuit model (OMP).
            "Perceptron":
            linear_model.Perceptron(
                max_iter=1000, tol=1e-3
            ),  # Perceptron([penalty, alpha, …])	Read more in the User Guide.
            "RANSACRegressor":
            linear_model.RANSACRegressor(
            ),  # RANSACRegressor([…])	RANSAC (RANdom SAmple Consensus) algorithm.
            "SGDRegressor":
            linear_model.SGDRegressor(
                max_iter=1000, tol=1e-3
            ),  # SGDRegressor([loss, penalty, …])	Linear model fitted by minimizing a regularized empirical loss with SGD
            "TheilSenRegressor":
            linear_model.TheilSenRegressor(
            ),  # TheilSenRegressor([…])	Theil-Sen Estimator: robust multivariate regression model.
            "PassiveAggressiveRegressor":
            linear_model.PassiveAggressiveRegressor(
                max_iter=1000, tol=1e-3
            ),  # PassiveAggressiveRegressor([C, …])	Passive Aggressive Regressor
            "Lars":
            linear_model.Lars(
                eps=0.01
            ),  # Lars([fit_intercept, verbose, …])	Least Angle Regression model a.k.a.
            "LarsCV":
            linear_model.LarsCV(
                cv=5, eps=0.01
            ),  # LarsCV([fit_intercept, …])	Cross-validated Least Angle Regression model.
            "Lasso":
            linear_model.Lasso(
                alpha=1, max_iter=1000
            ),  # Lasso([alpha, fit_intercept, …])	Linear Model trained with L1 prior as regularizer (aka the Lasso)
            "LassoCV":
            linear_model.LassoCV(
                cv=5
            ),  # LassoCV([eps, n_alphas, …])	Lasso linear model with iterative fitting along a regularization path.
            "LassoLars":
            linear_model.LassoLars(
                eps=0.01
            ),  # LassoLars([alpha, …])	Lasso model fit with Least Angle Regression a.k.a.
            "LassoLarsCV":
            linear_model.LassoLarsCV(
                cv=5, eps=0.01, max_iter=100
            ),  # LassoLarsCV([fit_intercept, …])	Cross-validated Lasso, using the LARS algorithm.
            "LassoLarsIC":
            linear_model.LassoLarsIC(
                eps=0.01
            ),  # LassoLarsIC([criterion, …])	Lasso model fit with Lars using BIC or AIC for model selection
            "Ridge":
            linear_model.Ridge(
            ),  # Ridge([alpha, fit_intercept, …])	Linear least squares with l2 regularization.
            "RidgeClassifier":
            linear_model.RidgeClassifier(
            ),  # RidgeClassifier([alpha, …])	Classifier using Ridge regression.
            "RidgeClassifierCV":
            linear_model.RidgeClassifierCV(
                cv=5
            ),  # RidgeClassifierCV([alphas, …])	Ridge classifier with built-in cross-validation.
            "RidgeCV":
            linear_model.RidgeCV(
                cv=5
            ),  # RidgeCV([alphas, …])	Ridge regression with built-in cross-validation.
            "SGDClassifier":
            linear_model.SGDClassifier(
                max_iter=1000, tol=1e-3
            ),  # SGDClassifier([loss, penalty, …])	Linear classifiers (SVM, logistic regression, a.o.) with SGD training.
            "ElasticNet":
            linear_model.ElasticNet(
            ),  # linear_model.ElasticNet([alpha, l1_ratio, …])	Linear regression with combined L1 and L2 priors as regularizer.
            "ElasticNetCV":
            linear_model.ElasticNetCV(
                cv=5
            ),  # linear_model.ElasticNetCV([l1_ratio, eps, …])	Elastic Net model with iterative fitting along a regularization path.

            ### Ignore These
            # "LogisticRegression":           linear_model.LogisticRegression(),                    # LogisticRegression([penalty, …])	Logistic Regression (aka logit, MaxEnt) classifier.
            # "LogisticRegressionCV":         linear_model.LogisticRegressionCV(cv=5),              # LogisticRegressionCV([Cs, …])	Logistic Regression CV (aka logit, MaxEnt) classifier.
            # "MultiTaskLasso":               linear_model.MultiTaskLasso(),                        # MultiTaskLasso([alpha, …])	Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
            # "MultiTaskElasticNet":          linear_model.MultiTaskElasticNet(),                   # MultiTaskElasticNet([alpha, …])	Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer
            # "MultiTaskLassoCV":             linear_model.MultiTaskLassoCV(cv=5),                  # MultiTaskLassoCV([eps, …])	Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
            # "MultiTaskElasticNetCV":        linear_model.MultiTaskElasticNetCV(cv=5),             # MultiTaskElasticNetCV([…])	Multi-task L1/L2 ElasticNet with built-in cross-validation.
            # "OrthogonalMatchingPursuit":    linear_model.OrthogonalMatchingPursuit(),             # OrthogonalMatchingPursuit([…])	Orthogonal Matching Pursuit model (OMP)
            # "PassiveAggressiveClassifier":  linear_model.PassiveAggressiveClassifier(),           # PassiveAggressiveClassifier([…])	Passive Aggressive Classifier

            ### Normalization seems to make the score worse!
            # "LinearRegressionNormalize":    linear_model.LinearRegression(normalize=True),          # LinearRegression([…])	Ordinary least squares Linear Regression.
            # "RidgeCVNormalize":             linear_model.RidgeCV(cv=5, normalize=True),             # RidgeCV([alphas, …])	Ridge regression with built-in cross-validation.
            # "LassoLarsNormalize":           linear_model.LassoLars(eps=0.01, normalize=True),       # LassoLars([alpha, …])	Lasso model fit with Least Angle Regression a.k.a.
            # "LassoLarsICNormalize":         linear_model.LassoLarsIC(eps=0.01, normalize=True),     # LassoLarsIC([criterion, …])	Lasso model fit with Lars using BIC or AIC for model selection
            # "ARDRegressionNormalize":       linear_model.ARDRegression(normalize=True),             #  ARDRegression([n_iter, tol, …])	Bayesian ARD regression.
            # "BayesianRidgeNormalize":       linear_model.BayesianRidge(normalize=True),             # BayesianRidge([n_iter, tol, …])	Bayesian ridge regression.
        }
示例#19
0
def model_comparison():
    data, target = load_train()

    pipeline = create_pipeline()

    data = pipeline.fit_transform(data)

    MLA = [
        #Ensemble Methods
        ensemble.AdaBoostRegressor(),
        ensemble.BaggingRegressor(),
        ensemble.ExtraTreesRegressor(),
        ensemble.GradientBoostingRegressor(),
        ensemble.RandomForestRegressor(),

        #Gaussian Processes
        gaussian_process.GaussianProcessRegressor(),

        #GLM
        linear_model.PassiveAggressiveRegressor(),
        linear_model.Ridge(),
        linear_model.Lasso(),
        linear_model.ElasticNet(),
        linear_model.SGDRegressor(),

        #Nearest Neighbor
        neighbors.KNeighborsRegressor(),

        #SVM
        svm.SVR(),
        svm.NuSVR(),
        svm.LinearSVR(),

        #Trees
        tree.DecisionTreeRegressor(),
        tree.ExtraTreeRegressor(),

        #xgboost: http://xgboost.readthedocs.io/en/latest/model.html
        XGBRegressor(),
        lgb.LGBMRegressor()
    ]



    #split dataset in cross-validation with this splitter class: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ShuffleSplit.html#sklearn.model_selection.ShuffleSplit
    #note: this is an alternative to train_test_split
    cv_split = model_selection.ShuffleSplit(n_splits = 10, test_size = .3, train_size = .6, random_state = 0 ) # run model 10x with 60/30 split intentionally leaving out 10%

    #create table to compare MLA metrics
    MLA_columns = ['MLA Name', 'MLA Parameters','MLA Train Accuracy Mean', 'MLA Test Accuracy Mean']
    MLA_compare = pd.DataFrame(columns = MLA_columns)

    #index through MLA and save performance to table
    row_index = 0
    for alg in MLA:

        #set name and parameters
        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())

        #score model with cross validation: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate
        rmse_scorer = make_scorer(rmse)
        cv_results = model_selection.cross_validate(alg, data, target, cv  = cv_split, scoring = rmse_scorer)

        MLA_compare.loc[row_index, 'MLA Time'] = cv_results['fit_time'].mean()
        MLA_compare.loc[row_index, 'MLA Train Accuracy Mean'] = cv_results['train_score'].mean()
        MLA_compare.loc[row_index, 'MLA Test Accuracy Mean'] = cv_results['test_score'].mean()
        #if this is a non-bias random sample, then +/-3 standard deviations (std) from the mean, should statistically capture 99.7% of the subsets
        MLA_compare.loc[row_index, 'MLA Test Accuracy 3*STD'] = cv_results['test_score'].std()*3   #let's know the worst that can happen!

        row_index+=1


    #print and sort table: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort_values.html
    MLA_compare.sort_values(by = ['MLA Test Accuracy Mean'], inplace = True)
    MLA_compare.to_csv('mla_comparison.csv', index=True)
    print(MLA_compare)
def optimalRegression(x_train, x_test, y_train, y_test):

    # metrics
    mean_absolute_errors = []

    # regression model
    models = [
        linear_model.LinearRegression(),
        linear_model.Ridge(fit_intercept=True,
                           alpha=0.0,
                           random_state=0,
                           normalize=True),
        linear_model.Lasso(alpha=0.1),
        linear_model.ElasticNet(),
        linear_model.Lars(n_nonzero_coefs=1),
        linear_model.LassoLars(),
        linear_model.OrthogonalMatchingPursuit(),
        linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6),
        linear_model.SGDRegressor(),
        MLPRegressor(solver='lbfgs'),
        linear_model.PassiveAggressiveRegressor(random_state=0),
        linear_model.RANSACRegressor(),
        linear_model.TheilSenRegressor(random_state=42),
        linear_model.HuberRegressor(fit_intercept=True,
                                    alpha=0.0,
                                    max_iter=100),
        Pipeline([('poly', PolynomialFeatures(degree=5, include_bias=False)),
                  ('linreg', linear_model.LinearRegression(normalize=True))])
    ]

    # model name
    names = [
        'Linear_Regression', 'Ridge_Regression', 'Lasso', 'Elastic_Net',
        'Least_Angle_Regression', 'LARS_Lasso', 'Orthogonal_Matching_Pursuit',
        'Logistic_Regression', 'Stochastic_Gradient_Descent',
        'Perceptron_Algorithms', 'Passive-aggressive_Algorithms', 'RANSAC',
        'Theil_SEN', 'Huber_Regression', 'Polynomial_Regression'
    ]

    for model in models:
        try:
            model.fit(x_train, y_train)
            predictions = cross_val_predict(model, x_test, y_test, cv=5)
            mean_absolute_errors.append(
                metrics.mean_absolute_error(y_test, predictions))
        except:
            mean_absolute_errors.append('n/a')

    df = pd.DataFrame({
        'Model_reference': models,
        'Model_name': names,
        'Mean_absolute_err': mean_absolute_errors
    })

    df.sort_values(by='Mean_absolute_err', ascending=True, inplace=True)
    df = df.reset_index(drop=True)

    print(df[['Model_name', 'Mean_absolute_err']])
    print("Optimal model is " + str(df['Model_name'][0]) + " with error " +
          str(df['Mean_absolute_err'][0]))
    print("Second Optimal model is " + str(df['Model_name'][1]) +
          " with error " + str(df['Mean_absolute_err'][1]))

    name1 = str(df['Model_name'][0])
    name2 = str(df['Model_name'][1])
    model1 = None
    model2 = None

    for model, name in zip(models, names):
        if name == str(df['Model_name'][0]):
            model1 = model
        elif name == str(df['Model_name'][1]):
            model2 = model

    return model1, name1, model2, name2
# denoise discount using wavelet transform
#Y = pd.Series(denoise_signal(Y))
#X_train, X_test, Y_train, Y_test =  train_test_split(X, Y, train_size = 1-365/len(df), shuffle = False)
X_train = load('Data/regression_train_X.npy', allow_pickle=True)
X_test = load('Data/regression_test_X.npy', allow_pickle=True)
Y_train = load('Data/regression_train_y.npy', allow_pickle=True)
Y_test = load('Data/regression_test_y.npy', allow_pickle=True)

## train SVM
regressors = [
    svm.SVR(),
    #        linear_model.SGDRegressor(),
    linear_model.BayesianRidge(),
    linear_model.LassoLars(),
    linear_model.ARDRegression(),
    linear_model.PassiveAggressiveRegressor(),
    linear_model.TheilSenRegressor(),
    linear_model.LinearRegression()
]

name = [
    'svm.SVR',
    #        'SGDRegressor',
    'BayesianRidge',
    'LassoLars',
    'ARDRegression',
    'PassiveAggressiveRegressor',
    'TheilSenRegressor',
    'LinearRegression'
]
示例#22
0
def get_regression_estimators(r, regression_models):
    if r == 'ARDRegression':
        regression_models[r] = linear_model.ARDRegression()
    elif r == 'BayesianRidge':
        regression_models[r] = linear_model.BayesianRidge()
    elif r == 'ElasticNet':
        regression_models[r] = linear_model.ElasticNet()
    elif r == 'ElasticNetCV':
        regression_models[r] = linear_model.ElasticNetCV()
    elif r == 'HuberRegressor':
        regression_models[r] = linear_model.HuberRegressor()
    elif r == 'Lars':
        regression_models[r] = linear_model.Lars()
    elif r == 'LarsCV':
        regression_models[r] = linear_model.LarsCV()
    elif r == 'Lasso':
        regression_models[r] = linear_model.Lasso()
    elif r == 'LassoCV':
        regression_models[r] = linear_model.LassoCV()
    elif r == 'LassoLars':
        regression_models[r] = linear_model.LassoLars()
    elif r == 'LassoLarsCV':
        regression_models[r] = linear_model.LassoLarsCV()
    elif r == 'LassoLarsIC':
        regression_models[r] = linear_model.LassoLarsIC()
    elif r == 'LinearRegression':
        regression_models[r] = linear_model.LinearRegression()
    elif r == 'LogisticRegression':
        regression_models[r] = linear_model.LogisticRegression()
    elif r == 'LogisticRegressionCV':
        regression_models[r] = linear_model.LogisticRegressionCV()
    elif r == 'MultiTaskElasticNet':
        regression_models[r] = linear_model.MultiTaskElasticNet()
    elif r == 'MultiTaskElasticNetCV':
        regression_models[r] = linear_model.MultiTaskElasticNetCV()
    elif r == 'MultiTaskLasso':
        regression_models[r] = linear_model.MultiTaskLasso()
    elif r == 'MultiTaskLassoCV':
        regression_models[r] = linear_model.MultiTaskLassoCV()
    elif r == 'OrthogonalMatchingPursuit':
        regression_models[r] = linear_model.OrthogonalMatchingPursuit()
    elif r == 'OrthogonalMatchingPursuitCV':
        regression_models[r] = linear_model.OrthogonalMatchingPursuitCV()
    elif r == 'PassiveAggressiveClassifier':
        regression_models[r] = linear_model.PassiveAggressiveClassifier()
    elif r == 'PassiveAggressiveRegressor':
        regression_models[r] = linear_model.PassiveAggressiveRegressor()
    elif r == 'Perceptron':
        regression_models[r] = linear_model.Perceptron()
    elif r == 'RANSACRegressor':
        regression_models[r] = linear_model.RANSACRegressor()
    elif r == 'Ridge':
        regression_models[r] = linear_model.Ridge()
    elif r == 'RidgeClassifier':
        regression_models[r] = linear_model.RidgeClassifier()
    elif r == 'RidgeClassifierCV':
        regression_models[r] = linear_model.RidgeClassifierCV()
    elif r == 'RidgeCV':
        regression_models[r] = linear_model.RidgeCV()
    elif r == 'SGDClassifier':
        regression_models[r] = linear_model.SGDClassifier()
    elif r == 'SGDRegressor':
        regression_models[r] = linear_model.SGDRegressor()
    elif r == 'TheilSenRegressor':
        regression_models[r] = linear_model.TheilSenRegressor()
    else:
        print(
            r +
            " is an unsupported regression type. Check if you have misspelled the name."
        )
示例#23
0
def regress_sys(folder,
                all_videos,
                yfit,
                training_size,
                randselect=True,
                trainingdata=[],
                frame=0,
                have_output=True,
                download=True,
                bucket_name='ccurtis.data'):
    """Uses regression based on image intensities to select tracking parameters.

    This function uses regression methods from the scikit-learn module to
    predict the lower quality cutoff values for particle filtering in TrackMate
    based on the intensity distributions of input images. Currently only uses
    the first frame of videos for analysis, and is limited to predicting
    quality values.

    In practice, users will run regress_sys twice in different modes to build
    a regression system. First, set have_output to False. Function will return
    list of randomly selected videos to include in the training dataset. The
    user should then manually track particles using the Trackmate GUI, and enter
    these values in during the next round as the input yfit variable.

    Parameters
    ----------
    folder : str
        S3 directory containing video files specified in all_videos.
    all_videos: list of str
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    yfit: numpy.ndarray
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size : int
        Number of files in training dataset.
    randselect : bool
        If True, will randomly select training videos from all_videos.
        If False, will use trainingdata as input training dataset.
    trainingdata : list of str
        Optional manually selected prefixes of video filenames to be
        used as training dataset.
    have_output: bool
        If you have already acquired the quality values (yfit) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.
    bucket_name : str
        S3 bucket containing videos to be downloaded for regression
        calculations.

    Returns
    -------
    regress_object : list of sklearn.svm.classes.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    tprefix : list of str
        Contains randomly selected images from all_videos to be included in
        training dataset.

    """

    if randselect:
        tprefix = []
        for i in range(0, training_size):
            random.seed(i + 1)
            tprefix.append(all_videos[random.randint(0, len(all_videos))])
            if have_output is False:
                print("Get parameters for: {}".format(tprefix[i]))
    else:
        tprefix = trainingdata

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            local_im = name + '.tif'
            remote_im = "{}/{}".format(folder, local_im)
            if download:
                aws.download_s3(remote_im, local_im, bucket_name=bucket_name)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[frame, :, :])
            descriptors[counter, 1] = np.std(test_image[frame, :, :])
            descriptors[counter, 2] = np.percentile(test_image[frame, :, :],
                                                    10)
            descriptors[counter, 3] = np.percentile(test_image[frame, :, :],
                                                    90)
            counter = counter + 1

        # Define regression techniques
        xfit = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(xfit, yfit))

        return regress_object

    else:
        return tprefix
    def __init__(self):
        ''' 
        Class constructor or initialization method. 
        '''
        # keys and tokens from the Twitter Dev Console 
        consumer_key = 'wELRpStXm3ClfLm1bmFNnHylH'
        consumer_secret = 'FHpTU0BBClgULhOMFrp2QyjaMcFg9LDWaNO2buyTQJ0WUtxyvW'
        access_token = '1236399499565608961-UtDzGjrLbcRevxCJRX2gAIv9s5HIhV'
        access_token_secret = 'MscQlrcL0vtGPBxct09tXTVxgwQD70UnOxEs0bY19X7yD'

        # attempt authentication 
        try: 
            # create OAuthHandler object 
            self.auth = OAuthHandler(consumer_key, consumer_secret) 
            # set access token and secret 
            self.auth.set_access_token(access_token, access_token_secret) 
            # create tweepy API object to fetch tweets 
            self.api = tweepy.API(self.auth) 
        except:
            print("Error: Authentication Failed")

        # creating object of TwitterClient Class 
        # api = TwitterClient()
        # calling function to get tweets
        wSent = ["WSENT"]
        aSent = ["ASENT"]

        for index in range(3,8):
            day = datetime.date.today() - datetime.timedelta(days = index)
            wTweets = self.get_tweets(query = 'weather', count = 100, geocode='41.2565,-96.05,5mi', until=day)
            aTweets = self.get_tweets(query = '', count = 100, geocode='41.2565,-96.05,5mi', until = day)

            ptweets = [tweet for tweet in wTweets if tweet['sentiment'] == 'positive']
            ntweets = [tweet for tweet in wTweets if tweet['sentiment'] == 'negative']
            netPosSent = (len(ptweets)/len(wTweets)) - (len(ntweets)/len(wTweets))

            wSent.append(netPosSent)

            ptweets = [tweet for tweet in aTweets if tweet['sentiment'] == 'positive']
            ntweets = [tweet for tweet in aTweets if tweet['sentiment'] == 'negative']
            netPosSent = (len(ptweets)/len(aTweets)) - (len(ntweets)/len(aTweets))

            aSent.append(netPosSent)
        
        # print(wSent)
        # print(aSent)


        url = "https://www.ncei.noaa.gov/orders/cdo/2069913.csv"

        dataset = pandas.read_csv(url)
        dataset = dataset.drop(['STATION', 'NAME', 'DATE'], axis = 1)
        dataset['WSENT'] = wSent[1:]
        # dataset['ASENT'] = aSent[1:]
        dataset = dataset.dropna()
        # print(dataset.shape)

        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        trainingData   = dataset.drop(['WSENT'], axis=1)
        trainingScores = dataset['WSENT']
        predictionData = dataset.drop(['WSENT'], axis=1)

        global clf

        for item in classifiers:
            # print(item)
            clf = item
            clf.fit(trainingData, trainingScores)
            print(clf.predict(predictionData),'\n')