def mark(self): blocks = [] xy_lim = get_xy_lim(self.points) # [x_min, x_max, y_min, y_max] self.origin = [xy_lim[0], xy_lim[2]] row = int(ceil((xy_lim[3] - xy_lim[2]) / self.resolution)) col = int(ceil((xy_lim[1] - xy_lim[0]) / self.resolution)) reg = LinearRegressor() for i in range(row): row_blocks = [] for j in range(col): position = [i, j] cordinate = self.pos2cordinate(position) points = filter_points(self.points, cordinate[0], cordinate[0] + self.resolution, \ cordinate[1], cordinate[1] + self.resolution) if len( points ) < self.LOWEST_POINTS_COUNT: # very few points in the block, param is None row_blocks.append(Block(None, points, position)) else: reg.process(points) param = reg.get_parameters()[0] slope = k2slope(param[0]) row_blocks.append(Block(param, points, position)) blocks.append(row_blocks) return blocks
def predict(self, this_dict): first_variable_value = this_dict[self.first_variable] for key in self.df.columns: if key not in (self.first_variable, self.dependent_variable): exponent = float(key.split('^')[1]) value = first_variable_value**exponent this_dict[key] = value linear_regressor = LinearRegressor(self.df, self.dependent_variable) return linear_regressor.predict(this_dict)
def calculate_coefficients(self): df = self.dataframe dep_vals = df.data_dict[self.dependent_variable] new_dep_vals = [math.log(self.upper_bound/y - 1) for y in dep_vals] df.data_dict[self.dependent_variable] = new_dep_vals return LinearRegressor(df, self.dependent_variable).calculate_coefficients()
def calc_coefficients(self): df_transform = { key: self.df.data_dict[key] for key in self.df.data_dict } df_transform[self.dv] = [ math.log((self.up_bound / i) - 1) for i in df_transform[self.dv] ] df_transform = DataFrame(df_transform, self.df.columns) linear_reg = LinearRegressor(df_transform, self.dv) return linear_reg.coefficients
def generate(self, points): divider = PointsDivider() controller = RegressionController() controller.set_points(points) controller.set_parts(divider, 0.5) reg = LinearRegressor() controller.fit(reg) result = [] for part, param, ends in zip(controller.parts, controller.parameters, controller.intersections): if not part.isolated: result.extend(self.generate_for_line(param[0], ends, self.get_linewidth(part.points, param[0]))) return result
def __init__(self, dataframe,upperbound, dependent_variable): self.upperbound = upperbound self.dependent_variable = dependent_variable dataframe.data_dict[dependent_variable] = [0.1 if value==0 else value for value in dataframe.data_dict[dependent_variable]] dependent_variable_column = dataframe.columns.index(dependent_variable) dependent_list = [math.log(self.upperbound/value -1) for value in dataframe.data_dict[dependent_variable]] dependent_transformed = dependent_variable + "_transfromed" new_columns = dataframe.columns new_columns[dependent_variable_column] = dependent_transformed transformed_data_dict = dataframe.data_dict #switching out old dependent variable list with transformed one transformed_data_dict[dependent_variable] = dependent_list transformed_data_dict[dependent_transformed] = transformed_data_dict[dependent_variable] del transformed_data_dict[dependent_variable] #Creating Dataframe from new datadict transformed_datafame = DataFrame(transformed_data_dict, new_columns) #linear regressor linear_regressor = LinearRegressor(transformed_datafame, dependent_transformed) self.coefficients = linear_regressor.coefficients
def main(): start, end, num_points = -1.8, 2.0, 20 x = np.linspace(start, end, num_points) y_truth = data_generators.ground_truth(x) y_generators_dict = { "not disturbed": data_generators.data_values, "disturbed": data_generators.data_values_disturbed } model = LinearRegressor() for name, generator in y_generators_dict.items(): y_target = generator(x) b_0_l2, b_1_l2 = model.fit(x, y_target, 'l2') y_pred_l2 = model.eval(x) b_0_l1, b_1_l1 = model.fit(x, y_target, 'l1') y_pred_l1 = model.eval(x) print(name) print(f"b_0_l2 = {b_0_l2} ; b_1_l2 = {b_1_l2} \n " f"b_0_l1 = {b_0_l1} ; b_1_l1 = {b_1_l1}") plt.plot(x, y_target, 'ko', mfc='none') plt.plot( x, y_truth, ) plt.plot( x, y_pred_l2, ) plt.plot( x, y_pred_l1, ) plt.legend(('Выборка', 'Модель', 'МНК', 'МНМ')) plt.xlabel('x') plt.ylabel('y') plt.savefig(f"{PATH_PLOTS}{name}.png") plt.show() pass
data_dict[str(y)].append(new_data[x][y]) else: data_dict[str(y)] = [new_data[x][y]] return data_dict polynomial_data = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3), (0.8, 41.6), (1.0, 56.5), (1.2, 72.4), (1.4, 88.7), (1.6, 104.8), (1.8, 120.1), (2.0, 134.0), (2.2, 145.9), (2.4, 155.2), (2.6, 161.3), (2.8, 163.6), (3.0, 161.5), (3.2, 154.4), (3.4, 141.7), (3.6, 122.8), (3.8, 97.1), (4.0, 64.0), (4.2, 22.9), (4.4, -26.8), (4.6, -85.7), (4.8, -154.4)] trigonometry_data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23), (0.8, -1.03), (1.0, -2.89), (1.2, -4.06), (1.4, -4.39), (1.6, -3.88), (1.8, -2.64), (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79), (2.8, 4.22), (3.0, 3.8), (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84), (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76), (4.8, -5.22)] data_dict = transform_polynomial_data(polynomial_data, 3) df = DataFrame(data_dict) regression = LinearRegressor(df, "4") print("polynomial_data" + str(regression.coefficients)) data_dict = transform_trigonometric_data(trigonometry_data) df = DataFrame(data_dict) regression = LinearRegressor(df, "4") print("trigonometry_data" + str(regression.coefficients))
(4.8, -5.22)], columns=['x', 'y']) #add different columns df = df.apply_add('x', lambda x: math.sin(x), 'sin(x)') df = df.apply_add('x', lambda x: math.cos(x), 'cos(x)') df = df.apply_add('x', lambda x: math.sin(2 * x), 'sin(2*x)') df = df.apply_add('x', lambda x: math.cos(2 * x), 'cos(2*x)') #save x and y values x_values = list(df.data_dict['x']) y_values = list(df.data_dict['y']) #delete x values df = df.del_column('x') #find coefficients linear_regressor = LinearRegressor(df, dependent_variable='y') coefficients = linear_regressor.coefficients print(coefficients) def apply_function(x, function): return function(x) print('x_values') print(x_values) print('x_values length', len(x_values)) new_y_values = [] for x in x_values: new_y_values.append( apply_function(
} df = DataFrame(data_dict, column_order=['beef', 'pb', 'condiments']) assert df.columns == ['beef', 'pb', 'condiments'], 'Wrong columns' df = df.create_dummy_variables() df = df.append_pairwise_interactions() df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['rating']))], 'rating': data_dict['rating'] }) assert df.columns == [ 'beef', 'pb', 'mayo', 'jelly', 'beef_pb', 'beef_mayo', 'beef_jelly', 'pb_mayo', 'pb_jelly', 'mayo_jelly', 'constant', 'rating' ], 'Wrong Columns' linear_regressor = LinearRegressor(df, prediction_column='rating') coeffs = { 'beef': 0.25, 'pb': 0.4, 'mayo': -1.25, 'jelly': 1.5, 'beef_pb': -0.21, 'beef_mayo': 1.05, 'beef_jelly': -0.85, 'pb_mayo': -0.65, 'pb_jelly': 0.65, 'mayo_jelly': -3.25, 'constant': 2.1875 } assert linear_regressor.coefficients == coeffs, 'Wrong Coeffs'
data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23), (0.8, -1.03), (1.0, -2.89), (1.2, -4.06), (1.4, -4.39), (1.6, -3.88), (1.8, -2.64), (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79), (2.8, 4.22), (3.0, 3.8), (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84), (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76), (4.8, -5.22)] columns = ['y', 'sin(x)', 'cos(x)', 'sin(2x)', 'cos(2x)'] new_data = [[y, math.sin(x), math.cos(x), math.sin(2 * x), math.cos(2 * x)] for (x, y) in data] df = DataFrame.from_array(new_data, columns) regressor = LinearRegressor(df, 'y') print(regressor.coefficients) ''' import matplotlib.pyplot as plt plt.style.use('bmh') x_points = [] predicted_points = [] x = 0 while x <= 5 : data_dict = {'sin(x)' : math.sin(x), 'cos(x)' : math.cos(x), 'sin(2x)' : math.sin(2 * x), 'cos(2x)' : math.cos(2 * x)} x_points.append(x) predicted_points.append(regressor.predict(data_dict)) x+=0.1
replace_none = (lambda element: avg_age if (element == None) else element) df = df.apply('Age', replace_none) sibsp_0 = df.apply('SibSp', (lambda element: 1 if (element == 0) else 0)).data_dict['SibSp'] df = df.add_data('SibSp=0', sibsp_0, df.columns.index('SibSp') + 1) df = df.apply('Parch', (lambda element: 1 if (element == 0) else 0), 'Parch=0') df = df.create_dummy_variables(initial_key='CabinType', add_on='CabinType=') df = df.create_dummy_variables(initial_key='Embarked', add_on='Embarked=') df_1 = df.select_columns(['Sex']) titanic_reg_1 = LinearRegressor(df_1, 'Survived') '''' df_1 = df.select_columns(['Sex']) titanic_reg_1 = LinearRegressor(df_1,'Survived') df_1 = df.select_columns(['Sex']) titanic_reg_1 = LinearRegressor(df_1,'Survived') df_1 = df.select_columns(['Sex']) titanic_reg_1 = LinearRegressor(df_1,'Survived') df_1 = df.select_columns(['Sex'])
import sys sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6]], columns=['slices of roast beef', 'tbsp of peanut butter', 'rating']) regressor = LinearRegressor(df, dependent_variable='rating') print(regressor.coefficients) print( regressor.predict({ 'slices of roast beef': 5, 'tbsp of peanut butter': 0 })) print( regressor.predict({ 'slices of roast beef': 5, 'tbsp of peanut butter': 5 }))
"PassengerId", "Survived", "Pclass", "Surname", "Sex", "Age", "SibSp", "SibSp=0", "Parch=0", "TicketType", "TicketNumber", "Fare", "CabinType=A", "CabinType=B", "CabinType=C", "CabinType=D", "CabinType=E", "CabinType=F", "CabinType=G", "CabinType=None", "CabinType=T", "CabinNumber", "Embarked=C", "Embarked=None", "Embarked=Q", "Embarked=S" ] new_df = DataFrame(new_dict, new_columns) training_df = new_df.select_rows([i for i in range(501)]) testing_df = new_df.select_rows( [i for i in range(501, len(new_df.to_array()))]) print('\nSex:') sex_df = training_df.select(['Sex', 'Survived']) test_sex_df = testing_df.select(['Sex', 'Survived']) survival_regressor = LinearRegressor(sex_df, 'Survived') print(survival_regressor.coefficients) correct_classifications = 0 for i in range(len(training_df.data_dict['Sex'])): prediction = round( survival_regressor.predict( {var: sex_df.data_dict[var][i] for var in sex_df.columns[:-1]})) if prediction > 1: prediction = 1 if prediction == sex_df.data_dict['Survived'][i]: correct_classifications += 1 print('train accuracy:', correct_classifications / len(training_df.data_dict['Sex']))
[],['mayo'],['jelly'],['mayo','jelly']], } df = DataFrame(data_dict) print("\n Testing columns of DataFrame") assert df.columns == ['beef', 'pb', 'condiments'],'Incorrect columns for DataFrame' print(" passed") df = df.create_dummy_variables() df = df.append_pairwise_interactions() df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['beef']))], 'rating': [1, 1, 4, 0, 4, 8, 1, 0, 5, 0, 9, 0, 0, 0, 0, 0] }) linear_regressor = LinearRegressor(df, prediction_column = 'rating') print("\n Testing Coefficients of LinearRegressor") assert linear_regressor.coefficients == {'beef': 0.25, 'pb': 0.4, 'mayo': -1.25, 'jelly': 1.5, 'beef_pb': -0.21, 'beef_mayo': 1.05, 'beef_jelly': -0.85, 'pb_mayo': -0.65, 'pb_jelly': 0.65, 'mayo_jelly': -3.25, 'constant': 2.19}, 'Incorrect coefficients for Linear Regressor' print(" passed") print("\n Testing Gathering all inputs of LinearRegressor") assert linear_regressor.gather_all_inputs({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == {'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, 'beef_pb': 25, 'beef_mayo': 5, 'beef_jelly': 5, 'pb_mayo': 5, 'pb_jelly': 5, 'mayo_jelly': 1, 'constant': 1}, 'Incorrect gather_all_inputs' print(" passed") print("\n Testing Prediction #1") assert linear_regressor.predict({'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1}) == -1.81, 'Incorrect prediction from linear regressor' print(" passed") print("\n Testing Prediction #2") assert linear_regressor.predict({ 'beef': 0,
import sys sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor dataset = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3), (0.8, 41.6), (1.0, 56.5), (1.2, 72.4), (1.4, 88.7), (1.6, 104.8), (1.8, 120.1), (2.0, 134.0), (2.2, 145.9), (2.4, 155.2), (2.6, 161.3), (2.8, 163.6), (3.0, 161.5), (3.2, 154.4), (3.4, 141.7), (3.6, 122.8), (3.8, 97.1), (4.0, 64.0), (4.2, 22.9), (4.4, -26.8), (4.6, -85.7), (4.8, -154.4)] new_columns = ['x', 'x^2', 'x^3', 'y'] new_dataset = [(pair[0], pair[0]**2, pair[0]**3, pair[1]) for pair in dataset] df = DataFrame.from_array(new_dataset, new_columns) polynomial_regressor = LinearRegressor(df, 'y') polynomial_regressor_coefficients = polynomial_regressor.coefficients print("polynomial_regressor_coefficients:", polynomial_regressor_coefficients)
[5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly') lin_df = DataFrame(df.data_dict, df.columns) linear_regressor = LinearRegressor(lin_df, dependent_variable='rating') print('linear_regressor') ''' print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 0, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 4, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 0, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 0, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 8, 'pb': 4 , 'mayo': 1, 'jelly': 0, 'beef * pb': 32, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 4, 'pb * jelly': 0, 'mayo * jelly': 0})) print(linear_regressor.predict({'beef': 8, 'pb': 0 , 'mayo': 1, 'jelly': 1, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 8, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 1}))''' df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]],
import sys sys.path.append('src') from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor from matrix import Matrix from dataframe import DataFrame df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6]], columns=['slices of roast beef', 'tablespoons of peanut butter', 'rating']) regressor = LinearRegressor(df, dependent_variable='rating') print(regressor.coefficients) print( regressor.predict({ 'slices of roast beef': 5, 'tablespoons of peanut butter': 0 })) print( regressor.predict({ 'slices of roast beef': 5, 'tablespoons of peanut butter': 5 })) df = DataFrame.from_array( [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2], [0, 4, 5], [0, 6, 7], [0, 8, 6], [2, 2, 0], [3, 4, 0]], columns=['beef', 'pb', 'rating'])
guess = 1 else: guess = 0 classifications.append((testing_passenger_ids[index], guess)) return classifications print('\n') ratings = [[survived] for index, survived in enumerate(survived_people) if index in dataframe_indices] dataframe.append_columns({'constant': [1 for _ in dataframe_indices]}) linear_regressor = LinearRegressor(dataframe, ratings, prediction_column='Survived') linear_regressor.solve_coefficients() linear_regressor_classifications = get_classifications(linear_regressor, testing_dataframe) for row in linear_regressor_classifications: print(row) print('\n') logistic_regressor = LogisticRegressor(dataframe, ratings, prediction_column='Survived') logistic_regressor.solve_coefficients() logistic_regressor_classifications = get_classifications( logistic_regressor, testing_dataframe) for row in logistic_regressor_classifications:
import sys sys.path.append('src') from linear_regressor import LinearRegressor from matrix import Matrix from dataframe import DataFrame df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['hours worked', 'progress']) regressor = LinearRegressor(df, dependent_variable='progress') # print('Testing attribute coefficients...') # assert [round(i,5) for i in regressor.coefficients] == [0.01667, 0.15] # print('PASSED') # print('Testing method predict...') # assert round(regressor.predict({'hours worked': 4}),5) == 0.61667 # print('PASSED') #Assignment 40 df = DataFrame.from_array( [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]], columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating']) regressor = LinearRegressor(df, dependent_variable='taste rating') print('Testing attribute coefficients...') assert { key: round(regressor.coefficients[key], 8) for key in regressor.coefficients } == { 'constant': 0.19252336, 'scoops of chocolate': -0.05981308,
regressor = LinearRegressor(df, dependent_variable='progress') print('Does all the linear_regressor stuff work') assert regressor.coefficients == [0.01667, 0.15], 'No, coefficients does not work' assert regressor.predict({'hours worked': 4}) == 0.61667, 'No, predict does not work' print('Yes they do', "\n") ''' df = DataFrame.from_array( [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]], columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating']) regressor = LinearRegressor(df, dependent_variable='taste rating') print('Does all the linear_regressor stuff work') reg_coeff = regressor.coefficients.copy() for (key, value) in reg_coeff.items(): reg_coeff[key] = round(value, 8) assert reg_coeff == { 'constant': 0.19252336, 'scoops of chocolate': -0.05981308, 'scoops of vanilla': 0.13271028 }, 'No, coefficients does not work' assert round( regressor.predict({
def solve_coefficients(self): linear_regressor = LinearRegressor(self.df, self.dependent_variable) self.coefficients = linear_regressor.coefficients