def test_linear_regression():


	# Create a data frame
	d = {'y' : pd.Series([95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
		 'x1' : pd.Series([85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
		 'x2' : pd.Series([10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
		 }


	df = pd.DataFrame(d)


	beta1, pvalues1 = linear_regression(df, 'y', 'x1')
	beta2, pvalues2 = linear_regression(df, 'y', 'x2')
	beta3, pvalues3 = linear_regression(df, 'y', 'x1', 'x2')


	expected_beta1 = np.array([ 0.69128736,  1.00610931]) # Calculated by hands
	expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844]) # Calculated by hands
	expected_beta2 = np.array([ 2.92830189,  9.03773585]) # Calculated by hands
	expected_p2 = [0.64660353670191761, 1.2010523101013017e-05] # Calculated by hands
	expected_beta3 = np.array([-0.01554384,  0.20355359,  7.55525124]) # Calculated by hands
	expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418]) # Calculated by hands

	assert_almost_equal(expected_beta1, beta1)
	assert_almost_equal(expected_p1, pvalues1)
	assert_almost_equal(expected_beta2, beta2)
	assert_almost_equal(expected_p2, pvalues2)
	assert_almost_equal(expected_beta3, beta3)
	assert_almost_equal(expected_p3, pvalues3)
def test_linear_regression():

    # Create a data frame
    d = {
        "y": pd.Series(
            [95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
        ),
        "x1": pd.Series(
            [85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
        ),
        "x2": pd.Series(
            [10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
        ),
    }

    df = pd.DataFrame(d)

    beta1, pvalues1 = linear_regression(df, "y", "x1")
    beta2, pvalues2 = linear_regression(df, "y", "x2")
    beta3, pvalues3 = linear_regression(df, "y", "x1", "x2")

    expected_beta1 = np.array([0.69128736, 1.00610931])  # Calculated by hands
    expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844])  # Calculated by hands
    expected_beta2 = np.array([2.92830189, 9.03773585])  # Calculated by hands
    expected_p2 = [0.64660353670191761, 1.2010523101013017e-05]  # Calculated by hands
    expected_beta3 = np.array([-0.01554384, 0.20355359, 7.55525124])  # Calculated by hands
    expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418])  # Calculated by hands

    assert_almost_equal(expected_beta1, beta1)
    assert_almost_equal(expected_p1, pvalues1)
    assert_almost_equal(expected_beta2, beta2)
    assert_almost_equal(expected_p2, pvalues2)
    assert_almost_equal(expected_beta3, beta3)
    assert_almost_equal(expected_p3, pvalues3)
示例#3
0
def run_linear(train, test):
    print("RUNNING LINEAR")
    #convert it to another dataframe with ages
    train = add_information(train)
    train.to_csv("test.csv")
    model = linear_regression(train)

    test = add_information(test)
    return evaluate_linear_regression(model, test)
示例#4
0
def brute_force(data):
    for col1 in data.columns:
        X = data[col1]
        if type(X[1]) != type(str(
        )):  # this is dirty but i don't have internet (pls don't judge ^^ )
            for col2 in data.columns:
                y = data[col2]
                if type(y[1]) != type('str') and (col1 != col2):
                    y_p = linear_regression(X, y)
                    plot_graph(data, X, y, y_p)
示例#5
0
def performing_algorithm(X, y, X_test):
    """
    :param X: Matrix
    :param y: Matrix
    :param X_test: Matrix
    :return: Prediction of chosen algorithm
    """
    if args.algorithm == "linear_regression":
        return linear_regression(X, y, X_test)
    elif args.algorithm == "decision_tree":
        return decision_tree(X, y, X_test)
    elif args.algorithm == "SVM":
        return SVM(X, y, X_test)
示例#6
0
def choose_function(data):
    choice = int(
        input("choose a function #0 Linear regression, #1 bruteforce :"))
    if choice == 0:
        print("linear regression choosed.")
        X, y = columns_selection(data)
        y_p = linear_regression(X, y)
        plot_graph(data, X, y, y_p)
    elif choice == 1:
        print("bruteforce choosed.")
        brute_force(data)
    else:
        choose_function(data)
示例#7
0
def plot_correlation(
    folder,
    lambda_alpha):

    file_path = os.path.join(folder,'gompertz','individualParameters','estimatedIndividualParameters.txt')
    df = pd.read_csv(file_path, sep = ',')
    alpha = df['alpha0_mode']
    beta = df['beta_mode']

    lr = linear_regression(beta.values, alpha.values, 1)
    fig, ax, fig_text = plot_regression_line(beta, alpha, lr.params, np.min([lr.rsquared, 0.99]), np.max([1e-5,np.min(lr.pvalues)]))
    ax.plot([beta.min(),beta.max()], lambda_alpha*np.ones(2),'k:', label = '$\lambda$ = '+np.str(lambda_alpha))
    ax.legend(loc=4, fontsize=13)
    figname=os.path.join(folder, 'correlation.pdf')
    fig.savefig(figname, dpi = 1000, format = 'pdf', bbox_inches='tight')
    fig_text.savefig(os.path.join(folder, 'correlation_box.pdf'), dpi = 1000, format = 'pdf', bbox_inches='tight')
示例#8
0
def age_prediction(matrix, column):
    """
    :param matrix: Matrix
    :param column: Int
    :return: Filled age column with predicted age
    """
    train_age = []
    test_age = []
    index = []
    for line in range(len(matrix)):
        if pd.isnull(matrix[line][column]):
            test_age.append(matrix[line])
            index.append(line)
        else:
            train_age.append(matrix[line])

    test_age = np.array(test_age)
    train_age = np.array(train_age)

    X_age = np.delete(train_age, [column], 1)
    X_age = X_age.astype(np.float)
    y_age = train_age[:, column]
    y_age = y_age.astype(np.int)
    X_test_age = np.delete(test_age, [column], 1)
    X_test_age = X_test_age.astype(np.float)

    predicted_age = linear_regression(X_age, y_age, X_test_age)

    for line in range(len(predicted_age)):
        if predicted_age[line] < 0:
            predicted_age[line] = 1

    var = 0
    for line in range(len(matrix)):
        if pd.isnull(matrix[line][column]):
            matrix[line][column] = predicted_age[var]
            var += 1
示例#9
0
	data_each.append(load_data(i, data_dir))
	#data_each.append(load_data(i))

for i in range(len(all_subjects)):
	data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss']



##############################
#  Peform linear regression  #
##############################

data = all_data

# Run the linear_regression function to get the summary
beta1, pvalues1 = linear_regression(data, 'RT', 'gain', 'loss')

beta2, pvalues2 = linear_regression(data, 'RT', 'ratio')

beta3, pvalues3 = linear_regression(data, 'RT', 'diff')


#######################
#        Plot         #
#######################


# PLot the simple regression
# Since the ratio is the most significant predictor

y = data['RT']
示例#10
0
############### Name: Shubham Pareek ############
############### UBID: spareek        ############

from logistic_regression import *
from linear_regression import *
from neural_network import *
from preprocessing import *

X1, y1 = get_feature_matrix(data='hod', method='concatenate')
X2, y2 = get_feature_matrix(data='hod', method='subtract')
X3, y3 = get_feature_matrix(data='gsc', method='concatenate')
X4, y4 = get_feature_matrix(data='gsc', method='subtract')

logistic_regression(X1, y1)
logistic_regression(X2, y2)
logistic_regression(X3, y3)
logistic_regression(X4, y4)

linear_regression(X1, y1)
linear_regression(X2, y2)
linear_regression(X3, y3)
linear_regression(X4, y4)

neural_network(X1, y1)
neural_network(X2, y2)
neural_network(X3, y3)
neural_network(X4, y4)
示例#11
0
data_each = []

for i in all_subjects:
	data_each.append(load_data(i, data_dir))

for i in range(len(all_subjects)):
	data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss']



#######################
#  Peform regression  #
#######################

data = all_data

# Run the linear_regression function to get the summary
linear_regression(data, 'RT', 'gain', 'loss')

linear_regression(data, 'RT', 'ratio')

linear_regression(data, 'RT', 'diff')


#######################
#        Plot         #
#######################



np.random.seed(42)

x = dataset.data
y = dataset.target

indices = np.random.permutation(len(x))
test_size = 100

x_train = x[indices[:-test_size]]
y_train = y[indices[:-test_size]]

x_test = x[indices[-test_size:]]
y_test = y[indices[-test_size:]]

regr = linear_regression()
regr.fit(x_train, y_train)

print("Coeffs: ", regr.beta[1:])
print("Intercept: ", regr.beta[0])
print("R2: ", regr.score(x_test, y_test))

train_pred = regr.predict(x_train)
test_pred = regr.predict(x_test)

min_val = min(min(train_pred), min(test_pred))
max_val = max(max(train_pred), max(test_pred))

# y_pred = 10, y = 12
# -2
plt.scatter(train_pred, train_pred - y_train, color="blue", s=40)