def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering, do_cv=False) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: cluster_targets_df = cluster_train_df['label'] if do_cv: cross_validation_result = cross_validate( regressor, cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel(), return_estimator=True, cv=10 #TODO per Chiara check se vuoi 10 cv ) validation_scores = cross_validation_result['test_score'] regressors = cross_validation_result['estimator'] regressor = regressors[dict( zip(validation_scores, range(len(validation_scores))) )[max( validation_scores )]] #TODO per Chiara check se vuoi il max o min o quello che sta in mezzo else: regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel()) models[cluster] = regressor try: regressor = clone(regressor) except TypeError: regressor = clone(regressor, safe=False) return { ModelType.CLUSTERER.value: clusterer, ModelType.REGRESSOR.value: models }
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: cluster_targets_df = cluster_train_df['label'] regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel()) models[cluster] = regressor try: regressor = clone(regressor) except TypeError: regressor = clone(regressor, safe=False) return {'clusterer': clusterer, PredictiveModels.REGRESSION.value: models}
def _cv_estimate(model: RegressorMixin, train_data: pd.DataFrame, features: List[str], y: str, n_splits: int) -> Tuple[pd.Series, List[RegressorMixin]]: cv = KFold(n_splits=n_splits) models = [] cv_pred = pd.Series(np.nan, index=train_data.index) for train, test in cv.split(train_data): m = model.fit(train_data[features].iloc[train], train_data[y].iloc[train]) cv_pred.iloc[test] = m.predict(train_data[features].iloc[test]) models += [m] return cv_pred, models