Exemplo n.º 1
0
def main():

    data = pd.read_csv('input' + os.sep + 'salary.csv')

    X = data['experience']
    Y = data['salary']

    path = 'output' + os.sep + 'gradient_descent.csv'

    step = 0.005
    print("step:", step)

    threshold = gen_threshold(Y, 0.001)
    print("threshold:", threshold)

    max_loop_num = 1000
    print("max_loop_num:", max_loop_num)

    train(X, Y, path, step, threshold, max_loop_num)

    Y_pred = predict(path, X)

    draw_graph(X, Y, Y_pred)

    Y = np.array(Y).reshape((len(Y), 1))
    Y_pred = np.array(Y_pred).reshape((len(Y_pred), 1))

    rmse_ration = calculate_rmse_ration(Y, Y_pred)
    print("rmse ratio:", rmse_ration)
    return
Exemplo n.º 2
0
def main():
    data = pd.read_csv('input' + os.sep + 'salary.csv')

    experiences = np.array(data['experience'])
    salaries = np.array(data['salary'])

    # 将特征数据集分为训练集和测试集,除了最后 4 个作为测试用例,其他都用于训练
    X_train = experiences[:7]
    X_train = X_train.reshape(-1,1)
    X_test = experiences[7:]
    X_test = X_test.reshape(-1,1)

    # 把目标数据(特征对应的真实值)也分为训练集和测试集
    y_train = salaries[:7]
    y_test = salaries[7:]

    model_file_path = "output" + os.sep + "linear_regression_model.sav"

    train(X_train, y_train, model_file_path)
    y_predicted = predict(X_test, model_file_path)

    rmse_ration = calculate_rmse_ration(y_test, y_predicted)
    print("rmse ratio:", rmse_ration)

    draw_graph(X_test, y_test, y_predicted)
Exemplo n.º 3
0
def main():
    model_file_path = "output" + os.sep + "linear_regression_model_mv.sav"

    ignored_columns = ['ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT']
    X, Y = load_data('input' + os.sep + 'housing.csv', False, ignored_columns)

    X = preprocess(X, "normalize")

    X_train, y_train, X_test, y_test = split_dataset(X, Y)

    train(X_train, y_train, model_file_path)
    y_predicted = predict(X_test, model_file_path)

    rmse_ration = calculate_rmse_ration(y_test, y_predicted)

    print("rmse ratio:", rmse_ration)
def main(input_path, output_path, ignored_columns, preprocess_type,
         training_data_rate, step_length, threshold_rate, max_loop_num,
         dynamic_step):
    print("input:", input_path)
    print("output:", output_path)
    print("\n")
    if ignored_columns is not None:
        print("ignored_columns:", ignored_columns)
    print("\n")
    print("preprocess_type:", preprocess_type)
    print("training_data_rate:", training_data_rate)
    print("\n")
    print("threshold_rate:", threshold_rate)
    print("max_loop_num:", max_loop_num)
    print("step_length:", step_length)
    if dynamic_step:
        print("dynamic stepping ...")
    else:
        print("static stepping ...")
    print("\n")
    start_time = datetime.now()

    X, Y = load_data(input_path, True, ignored_columns)

    X = preprocess(X, preprocess_type)

    X_train, y_train, X_test, y_test = split_dataset(X, Y, training_data_rate)

    threshold = gen_threshold(Y, threshold_rate)

    train(X_train, y_train, output_path, step_length, threshold, max_loop_num,
          dynamic_step)

    Y_pred = predict(output_path, X_test)

    rmse_ration = calculate_rmse_ration(y_test, Y_pred)
    print("rmse ratio (rmse / y_mean) is:", rmse_ration, "\n")

    end_time = datetime.now()

    execution_duration = end_time - start_time

    print("execution duration:", execution_duration, "\n")

    return
Exemplo n.º 5
0
def main():
    data = pd.read_csv('input' + os.sep + 'salary.csv')

    X = data['experience']
    Y = data['salary']

    path = 'output' + os.sep + 'lsm.csv'

    lsm(X, Y, path)

    Y_pred = predict(path, X)

    draw_graph(X, Y, Y_pred)

    Y = np.array(Y).reshape((len(Y), 1))
    rmse_ration = calculate_rmse_ration(Y, Y_pred)
    print("rmse ratio:", rmse_ration)
    return
Exemplo n.º 6
0
def main():

    ignored_columns = [
        'ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT'
    ]
    X, Y = load_data('input' + os.sep + 'housing.csv', True, ignored_columns)

    X = preprocess(X, "normalize")

    X_train, y_train, X_test, y_test = split_dataset(X, Y)

    path = 'output' + os.sep + 'lsm_multivariant.csv'

    lsm(X_train, y_train, path)
    y_predicted = predict(path, X_test)

    rmse_ration = calculate_rmse_ration(y_test, y_predicted)
    print("rmse ratio:", rmse_ration)
    return