def cv(data, target, multivariant=False):
     X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(data, target, test_size=0.2,
                                                                          random_state=0)
     if multivariant is False:
         linear_regression(X_train, X_test, Y_train, Y_test, plot=False)
     else:
         linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='Ridge_Regression')
Пример #2
0
def cv(data, target, multivariant=False):
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(data, target, test_size=0.1, random_state=10)
    if multivariant is False:
        linear_regression(X_train, X_test, Y_train, Y_test, plot=False)
    else:
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="ordinary_least_squares")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Ridge_Regression")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Bayesian_Regression")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="SVR")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="KNN_Reg")
Пример #3
0
def avg(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)

    train_data, cv_data = preprocess.get_train_cv_data_by_chunk(data)
    targets_train, targets_cv = preprocess.get_train_cv_targets(
        train_data, cv_data)

    (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk,
     hour_avg, weekday_avg) = feature_extraction.get_avg_maps(train_data)

    x_train_all, x_cv_all = feature_extraction.get_x_by_avg(
            train_data, cv_data, chunk_avg, hour_avg_by_chunk,
             weekday_avg_by_chunk, hour_avg, weekday_avg)

    clfs = regression.linear_regression(
        x_train_all, x_cv_all, targets_train, targets_cv)
    clfs = regression.random_forest(
        x_train_all, x_cv_all, targets_train, targets_cv)

    print 'Filling submission file...'
    sub_data = utilities.read_file(submission_file, True)
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        weekday = ''
        all_features = feature_extraction.get_features(
            chunk_id, weekday, hour, chunk_avg, hour_avg_by_chunk,
            weekday_avg_by_chunk, hour_avg, weekday_avg)

        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                feature = []
                for f in all_features:
                    feature.append(f[j - 5])
                sub_data[i][j] = clfs[j - 5].predict([feature])[0]

    utilities.write_file(output_file, sub_data)
Пример #4
0
import regression as reg
import load_data as ld


ld.create_sample('airfoil_self_noise_.csv','new_file.csv', 10)
ld.createSets('new_file.csv', 75)
b_vector = reg.linear_regression('train_set.csv')
error = reg.test("test_set.csv", b_vector)
Пример #5
0
		#print(data_list[i][0:-2])
		character.append(data_list[i][:-1])
		label.append(data_list[i][-1])
	return np.array(character), np.array(label)


if __name__ == '__main__':
	file_path = '/Users/yuqishi/Documents/machine_learning/data/波士顿房价数据集/housing_data.txt'
	split = 450

	#获得训练样本和测试样本
	train_data, test_data = get_Data(file_path, split)

	#获得训练样本的特征和标签数组
	train_X, train_Y = split_data(train_data)

	#获得测试数组X的特征和标签数组
	test_X, test_Y = split_data(test_data)

	#将训练集中的数据训练求得w参数值
	w = linear_regression(train_X, train_Y)
	print(predict(test_X, test_Y, w))