def train_model(variables): if len(variables) == 0: return None model = LinearRegression() model.fit(X[variables], y) return model def score_model(model, variables): if len(variables) == 0: return AIC_score(y, [y.mean()] * len(y), model, df=1) return AIC_score(y, model.predict(X[variables]), model) best_model, best_variables = stepwise_selection(X.columns, train_model, score_model, verbose=True) print() print(f'Intercept: {best_model.intercept_:.3f}') print('Coefficients:') for name, coef in zip(best_variables, best_model.coef_): print(f' {name}: {coef}') ### Weighted regression # We can calculate the Year from the date column using either a list comprehension or the data frame's `apply` method. house['Year'] = [int(date.split('-')[0]) for date in house.DocumentDate] house['Year'] = house.DocumentDate.apply(lambda d: int(d.split('-')[0])) house['Weight'] = house.Year - 2005
def score_model(model, variables): if len(variables) == 0: return AIC_score(train_y, [train_y.mean()] * len(train_y), model, df=1) return AIC_score(train_y, model.predict(train_X[variables]), model) allVars = train_X.columns best_model, best_vars = backward_elimination(allVars, train_model, score_model, verbose=True) best_model1, best_vars1 = stepwise_selection(allVars, train_model, score_model, verbose=True) best_model2, best_vars2 = forward_selection(allVars, train_model, score_model, verbose=True) ## REGULARIZATION bayR = BayesianRidge(normalize=True) bayR.fit(train_X, train_y) regressionSummary(valid_y, bayR.predict(valid_X)) alpha = bayR.lambda_ / bayR.alpha_ print('Bayesian ridge chosen Regularization: ', alpha)