Exemplo n.º 1
0
house1.info()

imputable_cat_features = get_categorical_features(house1)
cat_imputer = get_categorical_imputers(house1, imputable_cat_features)
house1[imputable_cat_features] = cat_imputer.transform(
    house1[imputable_cat_features])

imputable_cont_features = get_continuous_features(house1)
cont_imputer = get_continuous_imputers(house1, imputable_cont_features)
house1[imputable_cont_features] = cont_imputer.transform(
    house1[imputable_cont_features])
house1.info()

house2 = ohe(house1, imputable_cat_features)

scaler = get_scaler(house2)
house3 = scaler.transform(house2)
house3 = pd.DataFrame(house3, columns=house2.columns)

X_train = house3[:house_train.shape[0]]
y_train = house_train['SalePrice']
sns.distplot(y_train, hist=True)
y_trans = np.log1p(y_train)
sns.distplot(y_trans, hist=True)

scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

#union of 3 feature selectors
lasso_estimator = linear_model.Lasso()
lasso_grid = {'alpha': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.5]}
lasso_selector = get_best_model(lasso_estimator,
Exemplo n.º 2
0
house1.info()

imputable_cat_features = utils.get_non_continuous_features(house1)
cat_imputer = utils.get_categorical_imputers(house1, imputable_cat_features)
house1[imputable_cat_features] = cat_imputer.transform(
    house1[imputable_cat_features])

imputable_cont_features = utils.get_continuous_features(house1)
cont_imputer = utils.get_continuous_imputers(house1, imputable_cont_features)
house1[imputable_cont_features] = cont_imputer.transform(
    house1[imputable_cont_features])
house1.info()

house2 = utils.ohe(house1, imputable_cat_features)

scaler = utils.get_scaler(house2)
house3 = scaler.transform(house2)
house3 = pd.DataFrame(house3, columns=house2.columns)

X_train = house3[:house_train.shape[0]]
y_train = house_train['SalePrice']

lasso_selector = linear_model.Lasso()
lasso_selector.fit(X_train, y_train)
print(lasso_selector.coef_)
utils.plot_feature_importances(lasso_selector, X_train, 40)

X_train1 = utils.select_features(lasso_selector, X_train)

utils.corr_heatmap(X_train1)
lpca = decomposition.PCA(0.95)