Python split_dataset 예제들, utilities.split_dataset Python 예제들

예제 #1

0

파일 보기

파일: going_training_set_tests.py 프로젝트: maithuvenkatesh/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    horses_train_98, horses_test_98 = split_dataset(horses98)
    horses_train_05, horses_test_05 = split_dataset(horses05)

    going_class(horses_train_05)
    going_class(horses_train_98)

예제 #2

0

파일 보기

파일: going_training_set_tests.py 프로젝트: cheungsingyi/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    horses_train_98, horses_test_98 = split_dataset(horses98)
    horses_train_05, horses_test_05 = split_dataset(horses05)

    going_class(horses_train_05)
    going_class(horses_train_98)

예제 #3

0

파일 보기

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    horses_train_98, horses_test_98 = split_dataset(horses98)
    horses_train_05, horses_test_05 = split_dataset(horses05)

    print 'HorsesBorn98 Training Set:'
    print 'No. of horses: ' + str(len(horses_train_98))

    rating_vs_speed(horses_train_98)
    prize_money_vs_speed(horses_train_98)  
    odds_vs_speed(horses_train_98)
    age_vs_speed(horses_train_98)
    no_of_runners_vs_speed(horses_train_98)
    race_class_vs_speed(horses_train_98)
    weight_vs_speed(horses_train_98)
    jockeys_claim_vs_speed(horses_train_98)
    place_vs_speed(horses_train_98)
    distance_vs_speed(horses_train_98)
    comptime_vs_speed(horses_train_98)
    stall_vs_speed(horses_train_98)
    rating_vs_odds(horses_train_98)
    #goings_vs_speed(horses_train_98)

    print ''

    print 'HorsesBorn05 Training Set:'
    print 'No. of horses: ' + str(len(horses_train_05))
    
    rating_vs_speed(horses_train_05)
    prize_money_vs_speed(horses_train_05)  
    odds_vs_speed(horses_train_05)
    age_vs_speed(horses_train_05)
    no_of_runners_vs_speed(horses_train_05)
    race_class_vs_speed(horses_train_05)
    weight_vs_speed(horses_train_05)
    jockeys_claim_vs_speed(horses_train_05)
    place_vs_speed(horses_train_05)
    distance_vs_speed(horses_train_05)
    comptime_vs_speed(horses_train_05)
    stall_vs_speed(horses_train_05)
    rating_vs_odds(horses_train_05)

예제 #4

0

파일 보기

파일: training_set_tests.py 프로젝트: maithuvenkatesh/University-Project

def main():
    #horse_parser_98 = HorseParserNoHandicaps('./../Data/born98.csv')
    #horse_parser_05 = HorseParserNoHandicaps('./../Data/born05.csv')

    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    horses_train_98, horses_test_98 = split_dataset(horses98)
    horses_train_05, horses_test_05 = split_dataset(horses05)

    print 'HorsesBorn98 Training Set:'
    print 'No. of horses: ' + str(len(horses_train_98))

    jockeys_claim_test(horses_train_98)

    '''
    rating_vs_speed(horses_train_98)
    prize_money_vs_speed(horses_train_98)  
    odds_vs_speed(horses_train_98)
    age_vs_speed(horses_train_98)
    no_of_runners_vs_speed(horses_train_98)
    race_class_vs_speed(horses_train_98)
    weight_vs_speed(horses_train_98)
    jockeys_claim_vs_speed(horses_train_98)
    place_vs_speed(horses_train_98)
    distance_vs_speed(horses_train_98)
    comptime_vs_speed(horses_train_98)
    stall_vs_speed(horses_train_98)
    #goings_vs_speed(horses_train_98)
    '''
    print ''

    print 'HorsesBorn05 Training Set:'
    print 'No. of horses: ' + str(len(horses_train_05))

    jockeys_claim_test(horses_train_05)
    
    '''

예제 #5

0

파일 보기

파일: demo_multivariant.py 프로젝트: juliali/AITechCampus

def main():
    model_file_path = "output" + os.sep + "linear_regression_model_mv.sav"

    ignored_columns = ['ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT']
    X, Y = load_data('input' + os.sep + 'housing.csv', False, ignored_columns)

    X = preprocess(X, "normalize")

    X_train, y_train, X_test, y_test = split_dataset(X, Y)

    train(X_train, y_train, model_file_path)
    y_predicted = predict(X_test, model_file_path)

    rmse_ration = calculate_rmse_ration(y_test, y_predicted)

    print("rmse ratio:", rmse_ration)

예제 #6

0

파일 보기

파일: gradient_descent_multivariant.py 프로젝트: juliali/AITechCampus

def main(input_path, output_path, ignored_columns, preprocess_type,
         training_data_rate, step_length, threshold_rate, max_loop_num,
         dynamic_step):
    print("input:", input_path)
    print("output:", output_path)
    print("\n")
    if ignored_columns is not None:
        print("ignored_columns:", ignored_columns)
    print("\n")
    print("preprocess_type:", preprocess_type)
    print("training_data_rate:", training_data_rate)
    print("\n")
    print("threshold_rate:", threshold_rate)
    print("max_loop_num:", max_loop_num)
    print("step_length:", step_length)
    if dynamic_step:
        print("dynamic stepping ...")
    else:
        print("static stepping ...")
    print("\n")
    start_time = datetime.now()

    X, Y = load_data(input_path, True, ignored_columns)

    X = preprocess(X, preprocess_type)

    X_train, y_train, X_test, y_test = split_dataset(X, Y, training_data_rate)

    threshold = gen_threshold(Y, threshold_rate)

    train(X_train, y_train, output_path, step_length, threshold, max_loop_num,
          dynamic_step)

    Y_pred = predict(output_path, X_test)

    rmse_ration = calculate_rmse_ration(y_test, Y_pred)
    print("rmse ratio (rmse / y_mean) is:", rmse_ration, "\n")

    end_time = datetime.now()

    execution_duration = end_time - start_time

    print("execution duration:", execution_duration, "\n")

    return

예제 #7

0

파일 보기

파일: lsm_multivariant.py 프로젝트: juliali/AITechCampus

def main():

    ignored_columns = [
        'ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT'
    ]
    X, Y = load_data('input' + os.sep + 'housing.csv', True, ignored_columns)

    X = preprocess(X, "normalize")

    X_train, y_train, X_test, y_test = split_dataset(X, Y)

    path = 'output' + os.sep + 'lsm_multivariant.csv'

    lsm(X_train, y_train, path)
    y_predicted = predict(path, X_test)

    rmse_ration = calculate_rmse_ration(y_test, y_predicted)
    print("rmse ratio:", rmse_ration)
    return

예제 #8

0

파일 보기

def run_experiments_without_cross_validation(model_names, features_to_use):
    dataset_features = utilities.load_features('pasokh')
    features, targets, labels, documents, all_vec = utilities.split_dataset(dataset_features, features_to_use, 0.40)

    X_normal = np.array(all_vec)

    utilities.normalize_dataset(X_normal, features_to_use, 'learn')

    X_train = np.array(features['train'])
    X_test = np.array(features['test'])
    y_train = np.array(targets['train'])
    y_test = np.array(targets['test'])
    labels_train = np.array(labels['train'])
    labels_test = np.array(labels['test'])

    print("Dataset size: {}".format(len(X_normal)))
    #print("Number of True/False labels: {}/{}".format(sum(labels), sum(1 for i in labels if not i)))
    (X_balanced, y_balanced, labels_balanced) = (X_train, y_train, labels_train)
    #X_balanced, y_balanced, labels_balanced = utilities.balance_dataset(X_train, y_train, labels_train, 3)
    print("Train set size: {}".format(len(X_balanced)))
    print("Number of True/False labels: {}/{}".format(sum(labels_balanced), sum(1 for i in labels_balanced if not i)))
    print("Test set size: {}".format(len(X_test)))
    print("Number of True/False labels: {}/{}".format(sum(labels_test), sum(1 for i in labels_test if not i)))
    print("Used features: {}".format(len(X_balanced[0])))

    dataset_json = json.loads(utilities.read_file('resources/pasokh/all.json'))
    is_regressor = True
    for model_type in model_names:
        print('**********************' + model_type + '**********************')
        if model_type == 'dtr':
            # max_depth=6
            regr = tree.DecisionTreeRegressor()
            regr = regr.fit(X_balanced, y_balanced)
            export_name = 'dtr'
        elif model_type == 'linear':
            regr = linear_model.LinearRegression(normalize=True)
            # Train the model using the training sets
            regr.fit(X_balanced, y_balanced)
            # The coefficients
            print('Coefficients: \n', regr.coef_)
            export_name = 'linear'
        elif model_type == 'svm':
            regr = SVR(verbose=True, epsilon=0.00001, gamma='auto', tol=.00001)
            # Train the model using the training sets
            regr.fit(X_balanced, y_balanced)
            # The coefficients
            print('Coefficients: \n', regr.get_params())
            export_name = 'svm'
        elif model_type == 'dummy':
            regr = RndRegressor()
            export_name = 'dummy'
        elif model_type == 'ideal':
            from IdealRegressor import IdealRegressor
            regr = IdealRegressor(X_train, y_train)
            regr.fit(X_test, y_test)
            export_name = 'ideal'
        elif model_type == 'nb':
            # from sklearn import svm
            # regr = svm.SVC(gamma='scale').fit(X_train, labels_train)
            from sklearn.naive_bayes import ComplementNB, GaussianNB
            from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
            from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

            regr = ComplementNB(alpha=1)
            regr.fit(X_train, labels_train)
            is_regressor = False
            export_name = 'nb'
        else:
            print("Regression type is undefined:" + model_type)
            continue
        # Make predictions using the testing set

        model_results = evaluate_model(regr, X_test, X_balanced, y_test, y_balanced, labels_test, labels_balanced, is_regressor)

        print('Summarizing dataset and evaluating Rouge...')
        rouge_scores = evaluate_summarizer(regr, dataset_json, features_to_use, True)
        utilities.print_rouges(rouge_scores)
        print('*****************************************************************************')
    return rouge_scores, model_results

예제 #9

0

파일 보기

파일: regression_model_one.py 프로젝트: maithuvenkatesh/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    # Create linear regression object
    regr98 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_98 = cross_validation.cross_val_score(regr98, np.array(horses_98_X_train), np.array(horses_98_y_train), scoring='mean_squared_error', cv=5)

    #print regr98.coeff_

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_98
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_98.mean())
    print 'SD of scores: ' + str(cv_scores_98.std() * 2)
    print ''

    # Train the model using the training sets
    regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    print 'Intercept: '
    print regr98.intercept_
    print ''

    # Predict using the testing set
    horses_98_y_pred = regr98.predict(horses_98_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    ''' ell1 and ell2 metrics computed using actual speeds and those predicted using the training set '''

    print 'ell1:'
    print ell1(horses_98_y_train, regr98.predict(horses_98_X_train))
    print ''

    print 'ell2:'
    print ell2(horses_98_y_train, regr98.predict(horses_98_X_train))
    print ''


    print ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v,s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v,s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''
    
    # Create linear regression object
    regr05 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_05 = cross_validation.cross_val_score(regr05, np.array(horses_05_X_train), np.array(horses_05_y_train), scoring='mean_squared_error', cv=5)

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_05
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_05.mean())
    print 'SD of scores: ' + str(cv_scores_05.std() * 2)
    print ''

    # Train the model using the training sets
    regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    print 'Intercept: '
    print regr05.intercept_
    print ''

    # Predict using the testing set
    horses_05_y_pred = regr05.predict(horses_05_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, horses_05_y_pred)
    print ''

    ''' ell1 and ell2 metrics computed using actual speeds and those predicted using the training set '''

    print 'ell1:'
    print ell1(horses_05_y_train, regr05.predict(horses_05_X_train))
    print ''

    print 'ell2:'
    print ell2(horses_05_y_train, regr98.predict(horses_05_X_train))
    print ''

예제 #10

0

파일 보기

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    print ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v, s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v, s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''

    # Create linear regression object
    regr98 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_98 = cross_validation.cross_val_score(
        regr98,
        np.array(horses_98_X_train),
        np.array(horses_98_y_train),
        scoring='r2',
        cv=5)

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_98
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_98.mean())
    print 'SD of scores: ' + str(cv_scores_98.std() * 2)
    print ''

    # Train the model using the training sets
    regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    print 'Intercept: '
    print regr98.intercept_
    print ''

    # Predict using the testing set
    horses_98_y_pred = regr98.predict(horses_98_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_98_y_test, horses_98_y_pred)
    print ''

    print ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v, s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v, s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''

    # Create linear regression object
    regr05 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_05 = cross_validation.cross_val_score(
        regr05,
        np.array(horses_05_X_train),
        np.array(horses_05_y_train),
        scoring='r2',
        cv=5)

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_05
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_05.mean())
    print 'SD of scores: ' + str(cv_scores_05.std() * 2)
    print ''

    # Train the model using the training sets
    regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    print 'Intercept: '
    print regr05.intercept_
    print ''

    # Predict using the testing set
    horses_05_y_pred = regr05.predict(horses_05_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_05_y_test, horses_05_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_05_y_pred, horses_05_y_test)
    print ''

예제 #11

0

파일 보기

파일: learn.py 프로젝트: Hrezaei/SummBot

def learn_models(model_names, features_to_use):
    """
    This version splits original texts in dataset for evaluating summaries
    """
    dataset_features = utilities.load_features('CNN')
    features, targets, labels, documents, all_vec = utilities.split_dataset(
        dataset_features, features_to_use, 0.28, 'CNN')
    #return utilities.write_dataset_csv(dataset_features, '/tmp/test.csv')
    '''
    cPickle.dump(features, open('features.pkl', 'wb'))
    cPickle.dump(targets, open('targets.pkl', 'wb'))
    cPickle.dump(labels, open('labels.pkl', 'wb'))
    cPickle.dump(documents, open('documents.pkl', 'wb'))
    cPickle.dump(all_vec, open('all_vec.pkl', 'wb'))

    
    features = cPickle.load(open('features.pkl', 'rb'))
    targets = cPickle.load(open('targets.pkl', 'rb'))
    labels = cPickle.load(open('labels.pkl', 'rb'))
    documents = cPickle.load(open('documents.pkl', 'rb'))
    all_vec = cPickle.load(open('all_vec.pkl', 'rb'))
    '''

    X_normal = np.array(all_vec)
    #X_normal = utilities.select_features(features_to_use, X_normal)
    # X_normal = StandardScaler().fit_transform(dataset[0])

    utilities.normalize_dataset(X_normal, features_to_use, 'learn')

    X_train = np.array(features['train'])
    X_test = np.array(features['test'])
    y_train = np.array(targets['train'])
    y_test = np.array(targets['test'])
    labels_train = np.array(labels['train'])
    labels_test = np.array(labels['test'])

    #X_train = utilities.select_features(features_to_use, X_train)
    #utilities.normalize_dataset(X_train, features_to_use)

    #X_test = utilities.select_features(features_to_use, X_test)
    #utilities.normalize_dataset(X_test, features_to_use)

    print("Dataset size: {}".format(len(all_vec)))

    #(X_balanced, y_balanced, labels_balanced) = (X_train, y_train, labels_train)
    X_balanced, y_balanced, labels_balanced = utilities.balance_dataset(
        X_train, y_train, labels_train, 1)
    print("Used features: " + ','.join(features_to_use))
    print("Train set size: {}".format(len(X_balanced)))
    print("Number of True/False labels: {}/{}".format(
        sum(labels_balanced), sum(1 for i in labels_balanced if not i)))
    print("Test set size: {}".format(len(X_test)))
    print("Number of True/False labels: {}/{}".format(
        sum(labels_test), sum(1 for i in labels_test if not i)))
    print("Used features: {}".format(len(X_balanced[0])))

    dataset_json = json.loads(
        utilities.read_file('resources/CNN/documents.json'))
    test_documents = {int(key): dataset_json[key] for key in documents['test']}
    is_regressor = True
    for model_type in model_names:
        print('**********************' + model_type + '**********************')
        if model_type == 'dtr':
            # max_depth=6
            regr = tree.DecisionTreeRegressor(criterion='friedman_mse')
            regr = regr.fit(X_balanced, y_balanced)
            print(regr.get_params())
            export_name = 'dtr'
        elif model_type == 'linear':
            regr = linear_model.LinearRegression()
            # Train the model using the training sets
            regr.fit(X_balanced, y_balanced)
            # The coefficients
            print('Coefficients: \n', regr.coef_)
            export_name = 'linear'
        elif model_type == 'svm':
            regr = SVR(kernel='rbf',
                       degree=7,
                       verbose=False,
                       epsilon=0.000001,
                       gamma='scale',
                       tol=.0000001,
                       shrinking=True)
            # Train the model using the training sets
            regr.fit(X_balanced, y_balanced)
            # The coefficients
            print('Coefficients: \n', regr.get_params())
            export_name = 'svm'
        elif model_type == 'dummy':
            regr = RndRegressor()
            export_name = 'dummy'
            is_regressor = False
        elif model_type == 'ideal':
            from IdealRegressor import IdealRegressor
            regr = IdealRegressor(X_train, y_train)
            #regr.predict(X_train)
            regr.fit(X_test, y_test)
            #regr.predict(X_test)
            export_name = 'ideal'
        elif model_type == 'nb':
            #from sklearn import svm
            #regr = svm.SVC(gamma='scale').fit(X_train, labels_train)
            from sklearn.naive_bayes import ComplementNB, GaussianNB
            from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
            from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

            regr = ComplementNB(alpha=0.015)
            regr.fit(X_train, labels_train)
            is_regressor = False
            export_name = 'nb'
        else:
            print("Regression type is undefined:" + model_type)
            continue
        # Make predictions using the testing set

        model_results = Learn.evaluate_model(regr, X_test, X_balanced, y_test,
                                             y_balanced, labels_test,
                                             labels_balanced, is_regressor)

        print('Summarizing dataset and evaluating Rouge...')

        rouge_scores = evaluate_summarizer(regr, test_documents,
                                           features_to_use, True)
        utilities.print_rouges(rouge_scores)
        utilities.export_model(regr, export_name)
        print(
            '*****************************************************************************'
        )
    return rouge_scores, model_results

예제 #12

0

파일 보기

파일: regression_past_performance_turf.py 프로젝트: maithuvenkatesh/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    print ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    # Create linear regression object
    regr98 = linear_model.LinearRegression(fit_intercept=True)

    # Train the model using the training sets
    regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    print 'Intercept: '
    print regr98.intercept_
    print ''

    # Predict using the testing set
    horses_98_y_pred = regr98.predict(horses_98_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_98_y_test, horses_98_y_pred)
    print ''

    print ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v,s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v,s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''
    
    # Create linear regression object
    regr05 = linear_model.LinearRegression(fit_intercept=True)

    # Train the model using the training sets
    regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    print 'Intercept: '
    print regr05.intercept_
    print ''

    # Predict using the testing set
    horses_05_y_pred = regr05.predict(horses_05_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_05_y_test, horses_05_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_05_y_pred, horses_05_y_test)
    print ''

예제 #13

0

파일 보기

파일: svr_baseline.py 프로젝트: maithuvenkatesh/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print 'HorsesBorn98 Dataset'
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    print 'Create SVR object'
    # Create svr object
    svr98 = SVR(kernel='linear', C=1e3)#, gamma=0.1)

    print 'Training SVR'
    # Train the model using the training sets
    svr98.fit(horses_98_X_train, horses_98_y_train)

    print 'Predicting'
    horses_98_y_pred = svr98.predict(horses_98_X_test)

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print svr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''

예제 #14

0

파일 보기

파일: regression_two.py 프로젝트: maithuvenkatesh/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    # Create linear regression object
    regr98 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr98.fit(horses_98_X_train, horses_98_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v,s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v,s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''
    
    # Create linear regression object
    regr05 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr05.fit(horses_05_X_train, horses_05_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr05.score(horses_05_X_test, horses_05_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

예제 #15

0

파일 보기

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    print ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v, s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v, s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''

    # Create linear regression object
    regr98 = linear_model.LinearRegression(fit_intercept=True)

    # Train the model using the training sets
    regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    print 'Intercept: '
    print regr98.intercept_
    print ''

    # Predict using the testing set
    horses_98_y_pred = regr98.predict(horses_98_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_98_y_test, horses_98_y_pred)
    print ''

    print ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v, s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v, s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''

    # Create linear regression object
    regr05 = linear_model.LinearRegression(fit_intercept=True)

    # Train the model using the training sets
    regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    print 'Intercept: '
    print regr05.intercept_
    print ''

    # Predict using the testing set
    horses_05_y_pred = regr05.predict(horses_05_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'R2 Score:'
    print r2_score(horses_05_y_test, horses_05_y_pred)
    print ''

    print '1-R1 Score:'
    print r1(horses_05_y_pred, horses_05_y_test)
    print ''

예제 #16

0

파일 보기

파일: regression_one.py 프로젝트: cheungsingyi/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    # Create linear regression object
    regr98 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr98.fit(horses_98_X_train, horses_98_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''


    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''


    ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v,s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v,s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''
    
    # Create linear regression object
    regr05 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr05.fit(horses_05_X_train, horses_05_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr05.score(horses_05_X_test, horses_05_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

예제 #17

0

파일 보기

파일: svr_baseline.py 프로젝트: cheungsingyi/University-Project

def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print 'HorsesBorn98 Dataset'
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v, s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v, s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''

    print 'Create SVR object'
    # Create svr object
    svr98 = SVR(kernel='linear', C=1e3)  #, gamma=0.1)

    print 'Training SVR'
    # Train the model using the training sets
    svr98.fit(horses_98_X_train, horses_98_y_train)

    print 'Predicting'
    horses_98_y_pred = svr98.predict(horses_98_X_test)

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print svr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''