house1.info() imputable_cat_features = get_categorical_features(house1) cat_imputer = get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = get_continuous_features(house1) cont_imputer = get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features]) house1.info() house2 = ohe(house1, imputable_cat_features) scaler = get_scaler(house2) house3 = scaler.transform(house2) house3 = pd.DataFrame(house3, columns=house2.columns) X_train = house3[:house_train.shape[0]] y_train = house_train['SalePrice'] sns.distplot(y_train, hist=True) y_trans = np.log1p(y_train) sns.distplot(y_trans, hist=True) scoring = metrics.make_scorer(log_rmse, greater_is_better=False) #union of 3 feature selectors lasso_estimator = linear_model.Lasso() lasso_grid = {'alpha': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5]} lasso_selector = get_best_model(lasso_estimator,
house1.info() imputable_cat_features = utils.get_non_continuous_features(house1) cat_imputer = utils.get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = utils.get_continuous_features(house1) cont_imputer = utils.get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features]) house1.info() house2 = utils.ohe(house1, imputable_cat_features) scaler = utils.get_scaler(house2) house3 = scaler.transform(house2) house3 = pd.DataFrame(house3, columns=house2.columns) X_train = house3[:house_train.shape[0]] y_train = house_train['SalePrice'] lasso_selector = linear_model.Lasso() lasso_selector.fit(X_train, y_train) print(lasso_selector.coef_) utils.plot_feature_importances(lasso_selector, X_train, 40) X_train1 = utils.select_features(lasso_selector, X_train) utils.corr_heatmap(X_train1) lpca = decomposition.PCA(0.95)