Exemplo n.º 1
0
house2 = utils.ohe(house1, imputable_cat_features)

scaler = utils.get_scaler(house2)
house3 = scaler.transform(house2)
house3 = pd.DataFrame(house3, columns=house2.columns)

X_train = house3[:house_train.shape[0]]
y_train = house_train['SalePrice']

lasso_selector = linear_model.Lasso()
lasso_selector.fit(X_train, y_train)
print(lasso_selector.coef_)
utils.plot_feature_importances(lasso_selector, X_train, 40)

X_train1 = utils.select_features(lasso_selector, X_train)

utils.corr_heatmap(X_train1)
lpca = decomposition.PCA(0.95)
lpca.fit(X_train1)
print(np.cumsum(lpca.explained_variance_ratio_))
pca_data = lpca.transform(X_train1)
print(pca_data.shape)

tsne = manifold.TSNE(n_components=2)
tsne_data = tsne.fit_transform(pca_data)
rutils.plot_data_3d_regression(tsne_data, y_train)

scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

sns.distplot(y_train)
Exemplo n.º 2
0
scaler = preprocessing.StandardScaler()
tmp = scaler.fit_transform(titanic)
titanic = pd.DataFrame(tmp, columns=titanic.columns)

titanic_train1 = titanic[:titanic_train.shape[0]]
y_train = titanic_train['Survived']

rf_estimator = ensemble.RandomForestClassifier()
rf_grid = {
    'max_depth': list(range(1, 9)),
    'n_estimators': list(range(1, 300, 100))
}
rf_final_estimator = cutils.grid_search_best_model(rf_estimator, rf_grid,
                                                   titanic_train1, y_train)
X_train = utils.select_features(rf_final_estimator,
                                titanic_train1,
                                threshold='mean')

tpot_estimator = tpot.TPOTClassifier(
    generations=10,
    population_size=40,
    verbosity=2,
    early_stop=3,
    random_state=100,
    cv=5,
    scoring='accuracy',
    periodic_checkpoint_folder='E:/checkpoint')
tpot_estimator.fit(X_train, y_train)
print(tpot_estimator.score(X_train, y_train))
print(tpot_estimator.fitted_pipeline_)
print(tpot_estimator._optimized_pipeline)
Exemplo n.º 3
0
scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

rf_estimator = ensemble.RandomForestRegressor(random_state=100)
rf_grid = {
    'n_estimators': list(range(100, 501, 200)),
    'max_features': [14, 16, 18, 20],
    'max_depth': [3, 5, 7]
}
rf_selector = get_best_model(rf_estimator,
                             rf_grid,
                             X_train,
                             y_trans,
                             scoring=scoring)
plot_feature_importances(rf_selector, X_train, 50)

X_train1 = select_features(rf_selector, X_train)

pca_data = feature_reduction_pca(X_train1, X_train1.shape[1])
tsne_data = feature_reduction_tsne(X_train1, 2)
plot_data_3d_regression(tsne_data, y_train)

knn_estimator = neighbors.KNeighborsRegressor()
knn_grid = {'n_neighbors': list(range(3, 20))}
grid_search_plot_one_parameter_curves(knn_estimator,
                                      knn_grid,
                                      X_train1,
                                      y_trans,
                                      scoring=scoring)
knn_model = get_best_model(knn_estimator,
                           knn_grid,
                           X_train1,