def test_linear_regression(): # Create a data frame d = {'y' : pd.Series([95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), 'x1' : pd.Series([85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), 'x2' : pd.Series([10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), } df = pd.DataFrame(d) beta1, pvalues1 = linear_regression(df, 'y', 'x1') beta2, pvalues2 = linear_regression(df, 'y', 'x2') beta3, pvalues3 = linear_regression(df, 'y', 'x1', 'x2') expected_beta1 = np.array([ 0.69128736, 1.00610931]) # Calculated by hands expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844]) # Calculated by hands expected_beta2 = np.array([ 2.92830189, 9.03773585]) # Calculated by hands expected_p2 = [0.64660353670191761, 1.2010523101013017e-05] # Calculated by hands expected_beta3 = np.array([-0.01554384, 0.20355359, 7.55525124]) # Calculated by hands expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418]) # Calculated by hands assert_almost_equal(expected_beta1, beta1) assert_almost_equal(expected_p1, pvalues1) assert_almost_equal(expected_beta2, beta2) assert_almost_equal(expected_p2, pvalues2) assert_almost_equal(expected_beta3, beta3) assert_almost_equal(expected_p3, pvalues3)
def test_linear_regression(): # Create a data frame d = { "y": pd.Series( [95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] ), "x1": pd.Series( [85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] ), "x2": pd.Series( [10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] ), } df = pd.DataFrame(d) beta1, pvalues1 = linear_regression(df, "y", "x1") beta2, pvalues2 = linear_regression(df, "y", "x2") beta3, pvalues3 = linear_regression(df, "y", "x1", "x2") expected_beta1 = np.array([0.69128736, 1.00610931]) # Calculated by hands expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844]) # Calculated by hands expected_beta2 = np.array([2.92830189, 9.03773585]) # Calculated by hands expected_p2 = [0.64660353670191761, 1.2010523101013017e-05] # Calculated by hands expected_beta3 = np.array([-0.01554384, 0.20355359, 7.55525124]) # Calculated by hands expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418]) # Calculated by hands assert_almost_equal(expected_beta1, beta1) assert_almost_equal(expected_p1, pvalues1) assert_almost_equal(expected_beta2, beta2) assert_almost_equal(expected_p2, pvalues2) assert_almost_equal(expected_beta3, beta3) assert_almost_equal(expected_p3, pvalues3)
def run_linear(train, test): print("RUNNING LINEAR") #convert it to another dataframe with ages train = add_information(train) train.to_csv("test.csv") model = linear_regression(train) test = add_information(test) return evaluate_linear_regression(model, test)
def brute_force(data): for col1 in data.columns: X = data[col1] if type(X[1]) != type(str( )): # this is dirty but i don't have internet (pls don't judge ^^ ) for col2 in data.columns: y = data[col2] if type(y[1]) != type('str') and (col1 != col2): y_p = linear_regression(X, y) plot_graph(data, X, y, y_p)
def performing_algorithm(X, y, X_test): """ :param X: Matrix :param y: Matrix :param X_test: Matrix :return: Prediction of chosen algorithm """ if args.algorithm == "linear_regression": return linear_regression(X, y, X_test) elif args.algorithm == "decision_tree": return decision_tree(X, y, X_test) elif args.algorithm == "SVM": return SVM(X, y, X_test)
def choose_function(data): choice = int( input("choose a function #0 Linear regression, #1 bruteforce :")) if choice == 0: print("linear regression choosed.") X, y = columns_selection(data) y_p = linear_regression(X, y) plot_graph(data, X, y, y_p) elif choice == 1: print("bruteforce choosed.") brute_force(data) else: choose_function(data)
def plot_correlation( folder, lambda_alpha): file_path = os.path.join(folder,'gompertz','individualParameters','estimatedIndividualParameters.txt') df = pd.read_csv(file_path, sep = ',') alpha = df['alpha0_mode'] beta = df['beta_mode'] lr = linear_regression(beta.values, alpha.values, 1) fig, ax, fig_text = plot_regression_line(beta, alpha, lr.params, np.min([lr.rsquared, 0.99]), np.max([1e-5,np.min(lr.pvalues)])) ax.plot([beta.min(),beta.max()], lambda_alpha*np.ones(2),'k:', label = '$\lambda$ = '+np.str(lambda_alpha)) ax.legend(loc=4, fontsize=13) figname=os.path.join(folder, 'correlation.pdf') fig.savefig(figname, dpi = 1000, format = 'pdf', bbox_inches='tight') fig_text.savefig(os.path.join(folder, 'correlation_box.pdf'), dpi = 1000, format = 'pdf', bbox_inches='tight')
def age_prediction(matrix, column): """ :param matrix: Matrix :param column: Int :return: Filled age column with predicted age """ train_age = [] test_age = [] index = [] for line in range(len(matrix)): if pd.isnull(matrix[line][column]): test_age.append(matrix[line]) index.append(line) else: train_age.append(matrix[line]) test_age = np.array(test_age) train_age = np.array(train_age) X_age = np.delete(train_age, [column], 1) X_age = X_age.astype(np.float) y_age = train_age[:, column] y_age = y_age.astype(np.int) X_test_age = np.delete(test_age, [column], 1) X_test_age = X_test_age.astype(np.float) predicted_age = linear_regression(X_age, y_age, X_test_age) for line in range(len(predicted_age)): if predicted_age[line] < 0: predicted_age[line] = 1 var = 0 for line in range(len(matrix)): if pd.isnull(matrix[line][column]): matrix[line][column] = predicted_age[var] var += 1
data_each.append(load_data(i, data_dir)) #data_each.append(load_data(i)) for i in range(len(all_subjects)): data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss'] ############################## # Peform linear regression # ############################## data = all_data # Run the linear_regression function to get the summary beta1, pvalues1 = linear_regression(data, 'RT', 'gain', 'loss') beta2, pvalues2 = linear_regression(data, 'RT', 'ratio') beta3, pvalues3 = linear_regression(data, 'RT', 'diff') ####################### # Plot # ####################### # PLot the simple regression # Since the ratio is the most significant predictor y = data['RT']
############### Name: Shubham Pareek ############ ############### UBID: spareek ############ from logistic_regression import * from linear_regression import * from neural_network import * from preprocessing import * X1, y1 = get_feature_matrix(data='hod', method='concatenate') X2, y2 = get_feature_matrix(data='hod', method='subtract') X3, y3 = get_feature_matrix(data='gsc', method='concatenate') X4, y4 = get_feature_matrix(data='gsc', method='subtract') logistic_regression(X1, y1) logistic_regression(X2, y2) logistic_regression(X3, y3) logistic_regression(X4, y4) linear_regression(X1, y1) linear_regression(X2, y2) linear_regression(X3, y3) linear_regression(X4, y4) neural_network(X1, y1) neural_network(X2, y2) neural_network(X3, y3) neural_network(X4, y4)
data_each = [] for i in all_subjects: data_each.append(load_data(i, data_dir)) for i in range(len(all_subjects)): data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss'] ####################### # Peform regression # ####################### data = all_data # Run the linear_regression function to get the summary linear_regression(data, 'RT', 'gain', 'loss') linear_regression(data, 'RT', 'ratio') linear_regression(data, 'RT', 'diff') ####################### # Plot # #######################
np.random.seed(42) x = dataset.data y = dataset.target indices = np.random.permutation(len(x)) test_size = 100 x_train = x[indices[:-test_size]] y_train = y[indices[:-test_size]] x_test = x[indices[-test_size:]] y_test = y[indices[-test_size:]] regr = linear_regression() regr.fit(x_train, y_train) print("Coeffs: ", regr.beta[1:]) print("Intercept: ", regr.beta[0]) print("R2: ", regr.score(x_test, y_test)) train_pred = regr.predict(x_train) test_pred = regr.predict(x_test) min_val = min(min(train_pred), min(test_pred)) max_val = max(max(train_pred), max(test_pred)) # y_pred = 10, y = 12 # -2 plt.scatter(train_pred, train_pred - y_train, color="blue", s=40)