house2 = utils.ohe(house1, imputable_cat_features) scaler = utils.get_scaler(house2) house3 = scaler.transform(house2) house3 = pd.DataFrame(house3, columns=house2.columns) X_train = house3[:house_train.shape[0]] y_train = house_train['SalePrice'] lasso_selector = linear_model.Lasso() lasso_selector.fit(X_train, y_train) print(lasso_selector.coef_) utils.plot_feature_importances(lasso_selector, X_train, 40) X_train1 = utils.select_features(lasso_selector, X_train) utils.corr_heatmap(X_train1) lpca = decomposition.PCA(0.95) lpca.fit(X_train1) print(np.cumsum(lpca.explained_variance_ratio_)) pca_data = lpca.transform(X_train1) print(pca_data.shape) tsne = manifold.TSNE(n_components=2) tsne_data = tsne.fit_transform(pca_data) rutils.plot_data_3d_regression(tsne_data, y_train) scoring = metrics.make_scorer(log_rmse, greater_is_better=False) sns.distplot(y_train)
scaler = preprocessing.StandardScaler() tmp = scaler.fit_transform(titanic) titanic = pd.DataFrame(tmp, columns=titanic.columns) titanic_train1 = titanic[:titanic_train.shape[0]] y_train = titanic_train['Survived'] rf_estimator = ensemble.RandomForestClassifier() rf_grid = { 'max_depth': list(range(1, 9)), 'n_estimators': list(range(1, 300, 100)) } rf_final_estimator = cutils.grid_search_best_model(rf_estimator, rf_grid, titanic_train1, y_train) X_train = utils.select_features(rf_final_estimator, titanic_train1, threshold='mean') tpot_estimator = tpot.TPOTClassifier( generations=10, population_size=40, verbosity=2, early_stop=3, random_state=100, cv=5, scoring='accuracy', periodic_checkpoint_folder='E:/checkpoint') tpot_estimator.fit(X_train, y_train) print(tpot_estimator.score(X_train, y_train)) print(tpot_estimator.fitted_pipeline_) print(tpot_estimator._optimized_pipeline)
scoring = metrics.make_scorer(log_rmse, greater_is_better=False) rf_estimator = ensemble.RandomForestRegressor(random_state=100) rf_grid = { 'n_estimators': list(range(100, 501, 200)), 'max_features': [14, 16, 18, 20], 'max_depth': [3, 5, 7] } rf_selector = get_best_model(rf_estimator, rf_grid, X_train, y_trans, scoring=scoring) plot_feature_importances(rf_selector, X_train, 50) X_train1 = select_features(rf_selector, X_train) pca_data = feature_reduction_pca(X_train1, X_train1.shape[1]) tsne_data = feature_reduction_tsne(X_train1, 2) plot_data_3d_regression(tsne_data, y_train) knn_estimator = neighbors.KNeighborsRegressor() knn_grid = {'n_neighbors': list(range(3, 20))} grid_search_plot_one_parameter_curves(knn_estimator, knn_grid, X_train1, y_trans, scoring=scoring) knn_model = get_best_model(knn_estimator, knn_grid, X_train1,